Aweseome. My concern is that the read/write functions are getting long and hard to read. Is there anything that makes sense to break out into a second function? (i.e. the actual unaligned handling part? Maybe a readUnaligned and writeUnaligned function with just a simple check in read/write to see if it needs to call the other?
Nate On Sat, Nov 8, 2008 at 12:08 AM, <[EMAIL PROTECTED]> wrote: > # HG changeset patch > # User Gabe Black <[EMAIL PROTECTED]> > # Date 1226044295 28800 > # Node ID d562cd16ff32bbdb73a5f571e40496a039092e52 > # Parent 2e61b60e6614e026ba055946a45c5f577d8d8ff8 > CPU: Make unaligned accesses work in the timing simple CPU. > > diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc > --- a/src/cpu/simple/timing.cc > +++ b/src/cpu/simple/timing.cc > @@ -241,57 +241,130 @@ > _status = Idle; > } > > +void > +TimingSimpleCPU::handleReadPacket(PacketPtr pkt) > +{ > + RequestPtr req = pkt->req; > + if (req->isMmapedIpr()) { > + Tick delay; > + delay = TheISA::handleIprRead(thread->getTC(), pkt); > + new IprEvent(pkt, this, nextCycle(curTick + delay)); > + _status = DcacheWaitResponse; > + dcache_pkt = NULL; > + } else if (!dcachePort.sendTiming(pkt)) { > + _status = DcacheRetry; > + dcache_pkt = pkt; > + } else { > + _status = DcacheWaitResponse; > + // memory system takes ownership of packet > + dcache_pkt = NULL; > + } > +} > > template <class T> > Fault > TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) > { > - Request *req = > - new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), > - _cpuId, /* thread ID */ 0); > + Fault fault; > + const int asid = 0; > + const int threadId = 0; > + const Addr pc = thread->readPC(); > > - if (traceData) { > - traceData->setAddr(req->getVaddr()); > + PacketPtr pkt; > + RequestPtr req; > + > + int blockSize = dcachePort.peerBlockSize(); > + int dataSize = sizeof(T); > + > + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); > + > + if (secondAddr > addr) { > + Addr firstSize = secondAddr - addr; > + Addr secondSize = dataSize - firstSize; > + // Make sure we'll only need two accesses. > + assert(roundDown(secondAddr + secondSize - 1, blockSize) == > + secondAddr); > + > + /* > + * Do the translations. If something isn't going to work, find out > + * before we waste time setting up anything else. > + */ > + req = new Request(asid, addr, firstSize, flags, pc, _cpuId, > threadId); > + fault = thread->translateDataReadReq(req); > + if (fault != NoFault) { > + delete req; > + return fault; > + } > + Request *secondReq = > + new Request(asid, secondAddr, secondSize, > + flags, pc, _cpuId, threadId); > + fault = thread->translateDataReadReq(secondReq); > + if (fault != NoFault) { > + delete req; > + delete secondReq; > + return fault; > + } > + > + // This is the packet we'll process now. > + pkt = new Packet(req, > + (req->isLocked() ? > + MemCmd::LoadLockedReq : MemCmd::ReadReq), > + Packet::Broadcast); > + pkt->dataDynamic<uint8_t>(new uint8_t[firstSize]); > + SplitSenderState *sendState = new SplitSenderState; > + pkt->senderState = sendState; > + > + // This is the second half of the access we'll deal with later. > + PacketPtr secondPkt = > + new Packet(secondReq, > + (secondReq->isLocked() ? > + MemCmd::LoadLockedReq : MemCmd::ReadReq), > + Packet::Broadcast); > + secondPkt->dataDynamic<uint8_t>(new uint8_t[secondSize]); > + sendState->secondPkt = secondPkt; > + > + /* > + * This is the big packet that will hold the data we've gotten so > far, > + * if any, and also act as the response we actually give to the > + * instruction. > + */ > + Request *origReq = > + new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId); > + origReq->setPhys(req->getPaddr(), dataSize, flags); > + PacketPtr bigPkt = > + new Packet(origReq, MemCmd::ReadResp, Packet::Broadcast); > + bigPkt->dataDynamic<T>(new T); > + sendState->bigPkt = bigPkt; > + } else { > + req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId); > + > + // translate to physical address > + Fault fault = thread->translateDataReadReq(req); > + > + if (fault != NoFault) { > + delete req; > + return fault; > + } > + > + pkt = new Packet(req, > + (req->isLocked() ? > + MemCmd::LoadLockedReq : MemCmd::ReadReq), > + Packet::Broadcast); > + pkt->dataDynamic<T>(new T); > } > > - // translate to physical address > - Fault fault = thread->translateDataReadReq(req); > - > - // Now do the access. > - if (fault == NoFault) { > - PacketPtr pkt = > - new Packet(req, > - (req->isLocked() ? > - MemCmd::LoadLockedReq : MemCmd::ReadReq), > - Packet::Broadcast); > - pkt->dataDynamic<T>(new T); > - > - if (req->isMmapedIpr()) { > - Tick delay; > - delay = TheISA::handleIprRead(thread->getTC(), pkt); > - new IprEvent(pkt, this, nextCycle(curTick + delay)); > - _status = DcacheWaitResponse; > - dcache_pkt = NULL; > - } else if (!dcachePort.sendTiming(pkt)) { > - _status = DcacheRetry; > - dcache_pkt = pkt; > - } else { > - _status = DcacheWaitResponse; > - // memory system takes ownership of packet > - dcache_pkt = NULL; > - } > - > - // This will need a new way to tell if it has a dcache attached. > - if (req->isUncacheable()) > - recordEvent("Uncached Read"); > - } else { > - delete req; > - } > + handleReadPacket(pkt); > > if (traceData) { > traceData->setData(data); > + traceData->setAddr(addr); > } > - return fault; > + > + // This will need a new way to tell if it has a dcache attached. > + if (req->isUncacheable()) > + recordEvent("Uncached Read"); > + > + return NoFault; > } > > Fault > @@ -364,26 +437,129 @@ > return read(addr, (uint32_t&)data, flags); > } > > +void > +TimingSimpleCPU::handleWritePacket() > +{ > + RequestPtr req = dcache_pkt->req; > + if (req->isMmapedIpr()) { > + Tick delay; > + delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt); > + new IprEvent(dcache_pkt, this, nextCycle(curTick + delay)); > + _status = DcacheWaitResponse; > + dcache_pkt = NULL; > + } else if (!dcachePort.sendTiming(dcache_pkt)) { > + _status = DcacheRetry; > + } else { > + _status = DcacheWaitResponse; > + // memory system takes ownership of packet > + dcache_pkt = NULL; > + } > +} > > template <class T> > Fault > TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) > { > - Request *req = > - new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), > - _cpuId, /* thread ID */ 0); > + const int asid = 0; > + const int threadId = 0; > + bool do_access = true; // flag to suppress cache access > + const Addr pc = thread->readPC(); > > - if (traceData) { > - traceData->setAddr(req->getVaddr()); > - } > + RequestPtr req; > > - // translate to physical address > - Fault fault = thread->translateDataWriteReq(req); > + int blockSize = dcachePort.peerBlockSize(); > + int dataSize = sizeof(T); > > - // Now do the access. > - if (fault == NoFault) { > + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); > + > + if (secondAddr > addr) { > + Fault fault; > + Addr firstSize = secondAddr - addr; > + Addr secondSize = dataSize - firstSize; > + // Make sure we'll only need two accesses. > + assert(roundDown(secondAddr + secondSize - 1, blockSize) == > + secondAddr); > + > + req = new Request(asid, addr, firstSize, flags, pc, _cpuId, > threadId); > + fault = thread->translateDataWriteReq(req); > + if (fault != NoFault) { > + delete req; > + return fault; > + } > + RequestPtr secondReq = new Request(asid, secondAddr, secondSize, > + flags, pc, _cpuId, threadId); > + fault = thread->translateDataWriteReq(secondReq); > + if (fault != NoFault) { > + delete req; > + delete secondReq; > + return fault; > + } > + > MemCmd cmd = MemCmd::WriteReq; // default > - bool do_access = true; // flag to suppress cache access > + if (req->isLocked()) { > + cmd = MemCmd::StoreCondReq; > + do_access = TheISA::handleLockedWrite(thread, req); > + } else if (req->isSwap()) { > + panic("Conditional swaps can't be split."); > + } > + > + MemCmd secondCmd = MemCmd::WriteReq; // default > + bool secondDoAccess = do_access; > + if (secondReq->isLocked()) { > + secondCmd = MemCmd::StoreCondReq; > + secondDoAccess = TheISA::handleLockedWrite(thread, secondReq); > + } else if (req->isSwap()) { > + panic("Conditional swaps can't be split."); > + } > + > + if (do_access != secondDoAccess) { > + panic("Both parts of a split store conditional" > + " must succeed or fail.\n"); > + } > + > + uint8_t *dataPtr; > + > + assert(dcache_pkt == NULL); > + // This is the packet we'll process now. > + dcache_pkt = new Packet(req, cmd, Packet::Broadcast); > + dataPtr = new uint8_t[firstSize]; > + memcpy(dataPtr, &data, firstSize); > + dcache_pkt->dataDynamic(dataPtr); > + SplitSenderState *sendState = new SplitSenderState; > + dcache_pkt->senderState = sendState; > + > + // This is the second half of the access we'll deal with later. > + PacketPtr secondPkt = > + new Packet(secondReq, secondCmd, Packet::Broadcast); > + dataPtr = new uint8_t[secondSize]; > + memcpy(dataPtr, ((uint8_t *)&data) + firstSize, secondSize); > + secondPkt->dataDynamic<uint8_t>(dataPtr); > + sendState->secondPkt = secondPkt; > + > + /* > + * This is the big packet that will hold the data we've gotten so > far, > + * if any, and also act as the response we actually give to the > + * instruction. > + */ > + RequestPtr origReq = > + new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId); > + origReq->setPhys(req->getPaddr(), dataSize, flags); > + PacketPtr bigPkt = > + new Packet(origReq, MemCmd::WriteResp, Packet::Broadcast); > + sendState->bigPkt = bigPkt; > + bigPkt->allocate(); > + bigPkt->set(data); > + } else { > + req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId); > + > + // translate to physical address > + Fault fault = thread->translateDataWriteReq(req); > + if (fault != NoFault) { > + delete req; > + return fault; > + } > + > + MemCmd cmd = MemCmd::WriteReq; // default > > if (req->isLocked()) { > cmd = MemCmd::StoreCondReq; > @@ -401,38 +577,27 @@ > assert(dcache_pkt == NULL); > dcache_pkt = new Packet(req, cmd, Packet::Broadcast); > dcache_pkt->allocate(); > - dcache_pkt->set(data); > - > - if (do_access) { > - if (req->isMmapedIpr()) { > - Tick delay; > - dcache_pkt->set(htog(data)); > - delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt); > - new IprEvent(dcache_pkt, this, nextCycle(curTick + delay)); > - _status = DcacheWaitResponse; > - dcache_pkt = NULL; > - } else if (!dcachePort.sendTiming(dcache_pkt)) { > - _status = DcacheRetry; > - } else { > - _status = DcacheWaitResponse; > - // memory system takes ownership of packet > - dcache_pkt = NULL; > - } > - } > - // This will need a new way to tell if it's hooked up to a cache or > not. > - if (req->isUncacheable()) > - recordEvent("Uncached Write"); > - } else { > - delete req; > + if (req->isMmapedIpr()) > + dcache_pkt->set(htog(data)); > + else > + dcache_pkt->set(data); > } > > + if (do_access) > + handleWritePacket(); > + > if (traceData) { > + traceData->setAddr(req->getVaddr()); > traceData->setData(data); > } > > + // This will need a new way to tell if it's hooked up to a cache or not. > + if (req->isUncacheable()) > + recordEvent("Uncached Write"); > + > // If the write needs to have a fault on the access, consider calling > // changeStatus() and changing it to "bad addr write" or something. > - return fault; > + return NoFault; > } > > Fault > @@ -721,11 +886,52 @@ > // received a response from the dcache: complete the load or store > // instruction > assert(!pkt->isError()); > - assert(_status == DcacheWaitResponse); > - _status = Running; > > numCycles += tickToCycles(curTick - previousTick); > previousTick = curTick; > + > + if (pkt->senderState) { > + SplitSenderState * sendState = > + dynamic_cast<SplitSenderState *>(pkt->senderState); > + assert(sendState); > + pkt->senderState = NULL; > + PacketPtr secondPkt = sendState->secondPkt; > + PacketPtr bigPkt = sendState->bigPkt; > + if (secondPkt) { > + // There's more data to get. > + secondPkt->senderState = sendState; > + sendState->secondPkt = NULL; > + assert(pkt->getSize() < bigPkt->getSize()); > + if (bigPkt->isRead()) { > + memcpy(bigPkt->getPtr<uint8_t>(), > + pkt->getPtr<uint8_t>(), > + pkt->getSize()); > + delete pkt->req; > + delete pkt; > + handleReadPacket(secondPkt); > + } else { > + dcache_pkt = secondPkt; > + handleWritePacket(); > + } > + return; > + } else { > + // This is it. Lets finish this up. > + assert(bigPkt->getSize() > pkt->getSize()); > + Addr offset = bigPkt->getSize() - pkt->getSize(); > + if (bigPkt->isRead()) { > + memcpy(bigPkt->getPtr<uint8_t>() + offset, > + pkt->getPtr<uint8_t>(), > + pkt->getSize()); > + } > + delete pkt->req; > + delete pkt; > + delete sendState; > + pkt = bigPkt; > + } > + } > + > + assert(_status == DcacheWaitResponse); > + _status = Running; > > Fault fault = curStaticInst->completeAcc(pkt, this, traceData); > > @@ -787,10 +993,11 @@ > // delay processing of returned data until next CPU clock edge > Tick next_tick = cpu->nextCycle(curTick); > > - if (next_tick == curTick) > + if (next_tick == curTick) { > cpu->completeDataAccess(pkt); > - else > + } else { > tickEvent.schedule(pkt, next_tick); > + } > > return true; > } > diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh > --- a/src/cpu/simple/timing.hh > +++ b/src/cpu/simple/timing.hh > @@ -48,6 +48,17 @@ > Event *drainEvent; > > private: > + > + class SplitSenderState : public Packet::SenderState > + { > + public: > + PacketPtr bigPkt; > + PacketPtr secondPkt; > + }; > + > + void handleReadPacket(PacketPtr pkt); > + // This function always implicitly uses dcache_pkt. > + void handleWritePacket(); > > class CpuPort : public Port > { > _______________________________________________ > m5-dev mailing list > m5-dev@m5sim.org > http://m5sim.org/mailman/listinfo/m5-dev > > _______________________________________________ m5-dev mailing list m5-dev@m5sim.org http://m5sim.org/mailman/listinfo/m5-dev