changeset 87a6f2ed585a in /z/repo/m5 details: http://repo.m5sim.org/m5?cmd=changeset;node=87a6f2ed585a description: inorder: add a fetch buffer to fetch unit Give fetch unit it's own parameterizable fetch buffer to read from. Very inefficient (architecturally and in simulation) to continually fetch at the granularity of the wordsize. As expected, the number of fetch memory requests drops dramatically
diffstat: src/cpu/inorder/InOrderCPU.py | 5 +- src/cpu/inorder/resources/cache_unit.cc | 76 +++-- src/cpu/inorder/resources/cache_unit.hh | 18 +- src/cpu/inorder/resources/fetch_unit.cc | 358 ++++++++++++++++++++++++------- src/cpu/inorder/resources/fetch_unit.hh | 51 +++- 5 files changed, 371 insertions(+), 137 deletions(-) diffs (truncated from 801 to 300 lines): diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/InOrderCPU.py --- a/src/cpu/inorder/InOrderCPU.py Fri Feb 04 00:08:21 2011 -0500 +++ b/src/cpu/inorder/InOrderCPU.py Fri Feb 04 00:08:22 2011 -0500 @@ -48,6 +48,9 @@ dcache_port = Port("Data Port") _cached_ports = ['icache_port', 'dcache_port'] + fetchBuffSize = Param.Unsigned(4, "Fetch Buffer Size (Number of Cache Blocks Stored)") + memBlockSize = Param.Unsigned(64, "Memory Block Size") + predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned(2048, "Size of local predictor") localCtrBits = Param.Unsigned(2, "Bits per counter") @@ -69,8 +72,6 @@ functionTraceStart = Param.Tick(0, "Cycle to start function trace") stageTracing = Param.Bool(False, "Enable tracing of each stage in CPU") - memBlockSize = Param.Unsigned(64, "Memory Block Size") - multLatency = Param.Unsigned(1, "Latency for Multiply Operations") multRepeatRate = Param.Unsigned(1, "Repeat Rate for Multiply Operations") div8Latency = Param.Unsigned(1, "Latency for 8-bit Divide Operations") diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/cache_unit.cc --- a/src/cpu/inorder/resources/cache_unit.cc Fri Feb 04 00:08:21 2011 -0500 +++ b/src/cpu/inorder/resources/cache_unit.cc Fri Feb 04 00:08:22 2011 -0500 @@ -97,7 +97,7 @@ CacheUnit::CacheUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - cachePortBlocked(false), predecoder(NULL) + cachePortBlocked(false) { cachePort = new CachePort(this); @@ -137,6 +137,9 @@ // Switch to Timing TLB translations. resourceEvent = new CacheUnitEvent[width]; + cacheBlkSize = this->cachePort->peerBlockSize(); + cacheBlkMask = cacheBlkSize - 1; + initSlots(); } @@ -375,28 +378,20 @@ } } -Fault -CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, - int flags, TheISA::TLB::Mode tlb_mode) +void +CacheUnit::setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req, + int acc_size, int flags) { ThreadID tid = inst->readTid(); Addr aligned_addr = inst->getMemAddr(); - unsigned stage_num = cache_req->getStageNum(); - unsigned slot_idx = cache_req->getSlot(); - if (tlb_mode == TheISA::TLB::Execute) { - inst->fetchMemReq = - new Request(inst->readTid(), aligned_addr, acc_size, flags, - inst->instAddr(), cpu->readCpuId(), inst->readTid()); - cache_req->memReq = inst->fetchMemReq; - } else { - if (!cache_req->is2ndSplit()) { + if (!cache_req->is2ndSplit()) { inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr, acc_size, flags, inst->instAddr(), cpu->readCpuId(), - inst->readTid()); + tid); cache_req->memReq = inst->dataMemReq; - } else { + } else { assert(inst->splitInst); inst->splitMemReq = new Request(cpu->asid[tid], @@ -407,9 +402,19 @@ cpu->readCpuId(), tid); cache_req->memReq = inst->splitMemReq; - } } - +} + +Fault +CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, + int flags, TheISA::TLB::Mode tlb_mode) +{ + ThreadID tid = inst->readTid(); + //Addr aligned_addr = inst->getMemAddr(); + unsigned stage_num = cache_req->getStageNum(); + unsigned slot_idx = cache_req->getSlot(); + + setupMemRequest(inst, cache_req, acc_size, flags); cache_req->fault = _tlb->translateAtomic(cache_req->memReq, @@ -842,8 +847,8 @@ } DPRINTF(InOrderCachePort, - "[tid:%i] [sn:%i] attempting to access cache\n", - tid, inst->seqNum); + "[tid:%i] [sn:%i] attempting to access cache for addr %08p\n", + tid, inst->seqNum, cache_req->dataPkt->getAddr()); if (do_access) { if (!cachePort->sendTiming(cache_req->dataPkt)) { @@ -1086,6 +1091,24 @@ squash(inst, stage_num, squash_seq_num + 1, tid); } +void +CacheUnit::squashCacheRequest(CacheReqPtr req_ptr) +{ + DynInstPtr inst = req_ptr->getInst(); + + req_ptr->setSquashed(); + inst->setSquashed(); + if (inst->validMemAddr()) { + DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to " + "remove addr. %08p dependencies.\n", + inst->readTid(), + inst->seqNum, + inst->getMemAddr()); + + removeAddrDependency(inst); + } +} + void CacheUnit::squash(DynInstPtr inst, int stage_num, @@ -1115,14 +1138,12 @@ map_it++; continue; } - - req_ptr->setSquashed(); - - req_ptr->getInst()->setSquashed(); CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr); assert(cache_req); + squashCacheRequest(cache_req); + int req_slot_num = req_ptr->getSlot(); if (cache_req->tlbStall) { @@ -1152,15 +1173,6 @@ req_ptr->getInst()->splitInst); } - if (req_ptr->getInst()->validMemAddr()) { - DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to " - "remove addr. %08p dependencies.\n", - req_ptr->getInst()->readTid(), - req_ptr->getInst()->seqNum, - req_ptr->getInst()->getMemAddr()); - - removeAddrDependency(req_ptr->getInst()); - } } map_it++; diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/cache_unit.hh --- a/src/cpu/inorder/resources/cache_unit.hh Fri Feb 04 00:08:21 2011 -0500 +++ b/src/cpu/inorder/resources/cache_unit.hh Fri Feb 04 00:08:22 2011 -0500 @@ -139,10 +139,16 @@ void squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + virtual void squashCacheRequest(CacheReqPtr req_ptr); + /** After memory request is completedd in the cache, then do final processing to complete the request in the CPU. */ - virtual void processCacheCompletion(PacketPtr pkt); + virtual void processCacheCompletion(PacketPtr pkt); + + /** Create request that will interface w/TLB and Memory objects */ + virtual void setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req, + int acc_size, int flags); void recvRetry(); @@ -167,7 +173,7 @@ uint64_t getMemData(Packet *packet); void setAddrDependency(DynInstPtr inst); - void removeAddrDependency(DynInstPtr inst); + virtual void removeAddrDependency(DynInstPtr inst); protected: /** Cache interface. */ @@ -190,8 +196,6 @@ return (addr & ~(cacheBlkMask)); } - TheISA::Predecoder predecoder; - bool tlbBlocked[ThePipeline::MaxThreads]; TheISA::TLB* tlb(); @@ -225,7 +229,7 @@ pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), retryPkt(NULL), memAccComplete(false), memAccPending(false), tlbStall(false), splitAccess(false), splitAccessNum(-1), - split2ndAccess(false), instIdx(idx) + split2ndAccess(false), instIdx(idx), fetchBufferFill(false) { } @@ -270,7 +274,9 @@ int splitAccessNum; bool split2ndAccess; int instIdx; - + + /** Should we expect block from cache access or fetch buffer? */ + bool fetchBufferFill; }; class CacheReqPacket : public Packet diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/fetch_unit.cc --- a/src/cpu/inorder/resources/fetch_unit.cc Fri Feb 04 00:08:21 2011 -0500 +++ b/src/cpu/inorder/resources/fetch_unit.cc Fri Feb 04 00:08:22 2011 -0500 @@ -37,6 +37,7 @@ #include "arch/utility.hh" #include "arch/predecoder.hh" #include "config/the_isa.hh" +#include "cpu/inorder/resources/cache_unit.hh" #include "cpu/inorder/resources/fetch_unit.hh" #include "cpu/inorder/pipeline_traits.hh" #include "cpu/inorder/cpu.hh" @@ -50,10 +51,42 @@ FetchUnit::FetchUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) - : CacheUnit(res_name, res_id, res_width, res_latency, _cpu, - params) + : CacheUnit(res_name, res_id, res_width, res_latency, _cpu, params), + instSize(sizeof(TheISA::MachInst)), fetchBuffSize(params->fetchBuffSize), + predecoder(NULL) { } +void +FetchUnit::createMachInst(std::list<FetchBlock*>::iterator fetch_it, + DynInstPtr inst) +{ + ExtMachInst ext_inst; + Addr block_addr = cacheBlockAlign(inst->getMemAddr()); + Addr fetch_addr = inst->getMemAddr(); + unsigned fetch_offset = (fetch_addr - block_addr) / instSize; + ThreadID tid = inst->readTid(); + TheISA::PCState instPC = inst->pcState(); + + + DPRINTF(InOrderCachePort, "Creating instruction [sn:%i] w/fetch data @" + "addr:%08p block:%08p\n", inst->seqNum, fetch_addr, block_addr); + + assert((*fetch_it)->valid); + + TheISA::MachInst *fetchInsts = + reinterpret_cast<TheISA::MachInst *>((*fetch_it)->block); + + MachInst mach_inst = + TheISA::gtoh(fetchInsts[fetch_offset]); + + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(instPC, inst->instAddr(), mach_inst); + ext_inst = predecoder.getExtMachInst(instPC); + + inst->pcState(instPC); + inst->setMachInst(ext_inst); +} + int FetchUnit::getSlot(DynInstPtr inst) { @@ -119,15 +152,64 @@ int acc_size, int flags) { ThreadID tid = inst->readTid(); - Addr aligned_addr = inst->getMemAddr(); + Addr aligned_addr = cacheBlockAlign(inst->getMemAddr()); inst->fetchMemReq = - new Request(inst->readTid(), aligned_addr, acc_size, flags, - inst->instAddr(), cpu->readCpuId(), inst->readTid()); + new Request(tid, aligned_addr, acc_size, flags, + inst->instAddr(), cpu->readCpuId(), tid); cache_req->memReq = inst->fetchMemReq; } _______________________________________________ m5-dev mailing list m5-dev@m5sim.org http://m5sim.org/mailman/listinfo/m5-dev