[m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

gblack Sat, 08 Nov 2008 00:07:42 -0800

# HG changeset patch
# User Gabe Black <[EMAIL PROTECTED]>
# Date 1226044295 28800
# Node ID d562cd16ff32bbdb73a5f571e40496a039092e52
# Parent  2e61b60e6614e026ba055946a45c5f577d8d8ff8
CPU: Make unaligned accesses work in the timing simple CPU.


diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -241,57 +241,130 @@
     _status = Idle;
 }
 
+void
+TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
+{
+    RequestPtr req = pkt->req;
+    if (req->isMmapedIpr()) {
+        Tick delay;
+        delay = TheISA::handleIprRead(thread->getTC(), pkt);
+        new IprEvent(pkt, this, nextCycle(curTick + delay));
+        _status = DcacheWaitResponse;
+        dcache_pkt = NULL;
+    } else if (!dcachePort.sendTiming(pkt)) {
+        _status = DcacheRetry;
+        dcache_pkt = pkt;
+    } else {
+        _status = DcacheWaitResponse;
+        // memory system takes ownership of packet
+        dcache_pkt = NULL;
+    }
+}
 
 template <class T>
 Fault
 TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
-    Request *req =
-        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
-                    _cpuId, /* thread ID */ 0);
+    Fault fault;
+    const int asid = 0;
+    const int threadId = 0;
+    const Addr pc = thread->readPC();
 
-    if (traceData) {
-        traceData->setAddr(req->getVaddr());
+    PacketPtr pkt;
+    RequestPtr req;
+
+    int blockSize = dcachePort.peerBlockSize();
+    int dataSize = sizeof(T);
+
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    if (secondAddr > addr) {
+        Addr firstSize = secondAddr - addr;
+        Addr secondSize = dataSize - firstSize;
+        // Make sure we'll only need two accesses.
+        assert(roundDown(secondAddr + secondSize - 1, blockSize) ==
+                secondAddr);
+
+        /*
+         * Do the translations. If something isn't going to work, find out
+         * before we waste time setting up anything else.
+         */
+        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, threadId);
+        fault = thread->translateDataReadReq(req);
+        if (fault != NoFault) {
+            delete req;
+            return fault;
+        }
+        Request *secondReq =
+            new Request(asid, secondAddr, secondSize,
+                        flags, pc, _cpuId, threadId);
+        fault = thread->translateDataReadReq(secondReq);
+        if (fault != NoFault) {
+            delete req;
+            delete secondReq;
+            return fault;
+        }
+
+        // This is the packet we'll process now.
+        pkt = new Packet(req,
+                         (req->isLocked() ?
+                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                         Packet::Broadcast);
+        pkt->dataDynamic<uint8_t>(new uint8_t[firstSize]);
+        SplitSenderState *sendState = new SplitSenderState;
+        pkt->senderState = sendState;
+
+        // This is the second half of the access we'll deal with later.
+        PacketPtr secondPkt =
+            new Packet(secondReq,
+                       (secondReq->isLocked() ?
+                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                       Packet::Broadcast);
+        secondPkt->dataDynamic<uint8_t>(new uint8_t[secondSize]);
+        sendState->secondPkt = secondPkt;
+
+        /*
+         * This is the big packet that will hold the data we've gotten so far,
+         * if any, and also act as the response we actually give to the
+         * instruction.
+         */
+        Request *origReq =
+            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
+        origReq->setPhys(req->getPaddr(), dataSize, flags);
+        PacketPtr bigPkt =
+            new Packet(origReq, MemCmd::ReadResp, Packet::Broadcast);
+        bigPkt->dataDynamic<T>(new T);
+        sendState->bigPkt = bigPkt;
+    } else {
+        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
+
+        // translate to physical address
+        Fault fault = thread->translateDataReadReq(req);
+
+        if (fault != NoFault) {
+            delete req;
+            return fault;
+        }
+
+        pkt = new Packet(req,
+                         (req->isLocked() ?
+                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                          Packet::Broadcast);
+        pkt->dataDynamic<T>(new T);
     }
 
-   // translate to physical address
-    Fault fault = thread->translateDataReadReq(req);
-
-    // Now do the access.
-    if (fault == NoFault) {
-        PacketPtr pkt =
-            new Packet(req,
-                       (req->isLocked() ?
-                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
-                       Packet::Broadcast);
-        pkt->dataDynamic<T>(new T);
-
-        if (req->isMmapedIpr()) {
-            Tick delay;
-            delay = TheISA::handleIprRead(thread->getTC(), pkt);
-            new IprEvent(pkt, this, nextCycle(curTick + delay));
-            _status = DcacheWaitResponse;
-            dcache_pkt = NULL;
-        } else if (!dcachePort.sendTiming(pkt)) {
-            _status = DcacheRetry;
-            dcache_pkt = pkt;
-        } else {
-            _status = DcacheWaitResponse;
-            // memory system takes ownership of packet
-            dcache_pkt = NULL;
-        }
-
-        // This will need a new way to tell if it has a dcache attached.
-        if (req->isUncacheable())
-            recordEvent("Uncached Read");
-    } else {
-        delete req;
-    }
+    handleReadPacket(pkt);
 
     if (traceData) {
         traceData->setData(data);
+        traceData->setAddr(addr);
     }
-    return fault;
+
+    // This will need a new way to tell if it has a dcache attached.
+    if (req->isUncacheable())
+        recordEvent("Uncached Read");
+
+    return NoFault;
 }
 
 Fault
@@ -364,26 +437,129 @@
     return read(addr, (uint32_t&)data, flags);
 }
 
+void
+TimingSimpleCPU::handleWritePacket()
+{
+    RequestPtr req = dcache_pkt->req;
+    if (req->isMmapedIpr()) {
+        Tick delay;
+        delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
+        new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
+        _status = DcacheWaitResponse;
+        dcache_pkt = NULL;
+    } else if (!dcachePort.sendTiming(dcache_pkt)) {
+        _status = DcacheRetry;
+    } else {
+        _status = DcacheWaitResponse;
+        // memory system takes ownership of packet
+        dcache_pkt = NULL;
+    }
+}
 
 template <class T>
 Fault
 TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
-    Request *req =
-        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
-                    _cpuId, /* thread ID */ 0);
+    const int asid = 0;
+    const int threadId = 0;
+    bool do_access = true;  // flag to suppress cache access
+    const Addr pc = thread->readPC();
 
-    if (traceData) {
-        traceData->setAddr(req->getVaddr());
-    }
+    RequestPtr req;
 
-    // translate to physical address
-    Fault fault = thread->translateDataWriteReq(req);
+    int blockSize = dcachePort.peerBlockSize();
+    int dataSize = sizeof(T);
 
-    // Now do the access.
-    if (fault == NoFault) {
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    if (secondAddr > addr) {
+        Fault fault;
+        Addr firstSize = secondAddr - addr;
+        Addr secondSize = dataSize - firstSize;
+        // Make sure we'll only need two accesses.
+        assert(roundDown(secondAddr + secondSize - 1, blockSize) == 
+                secondAddr);
+
+        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, threadId);
+        fault = thread->translateDataWriteReq(req);
+        if (fault != NoFault) {
+            delete req;
+            return fault;
+        }
+        RequestPtr secondReq = new Request(asid, secondAddr, secondSize,
+                                           flags, pc, _cpuId, threadId);
+        fault = thread->translateDataWriteReq(secondReq);
+        if (fault != NoFault) {
+            delete req;
+            delete secondReq;
+            return fault;
+        }
+
         MemCmd cmd = MemCmd::WriteReq; // default
-        bool do_access = true;  // flag to suppress cache access
+        if (req->isLocked()) {
+            cmd = MemCmd::StoreCondReq;
+            do_access = TheISA::handleLockedWrite(thread, req);
+        } else if (req->isSwap()) {
+            panic("Conditional swaps can't be split.");
+        }
+
+        MemCmd secondCmd = MemCmd::WriteReq; // default
+        bool secondDoAccess = do_access;
+        if (secondReq->isLocked()) {
+            secondCmd = MemCmd::StoreCondReq;
+            secondDoAccess = TheISA::handleLockedWrite(thread, secondReq);
+        } else if (req->isSwap()) {
+            panic("Conditional swaps can't be split.");
+        }
+
+        if (do_access != secondDoAccess) {
+            panic("Both parts of a split store conditional"
+                  " must succeed or fail.\n");
+        }
+
+        uint8_t *dataPtr;
+
+        assert(dcache_pkt == NULL);
+        // This is the packet we'll process now.
+        dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
+        dataPtr = new uint8_t[firstSize];
+        memcpy(dataPtr, &data, firstSize);
+        dcache_pkt->dataDynamic(dataPtr);
+        SplitSenderState *sendState = new SplitSenderState;
+        dcache_pkt->senderState = sendState;
+
+        // This is the second half of the access we'll deal with later.
+        PacketPtr secondPkt =
+            new Packet(secondReq, secondCmd, Packet::Broadcast);
+        dataPtr = new uint8_t[secondSize];
+        memcpy(dataPtr, ((uint8_t *)&data) + firstSize, secondSize);
+        secondPkt->dataDynamic<uint8_t>(dataPtr);
+        sendState->secondPkt = secondPkt;
+
+        /*
+         * This is the big packet that will hold the data we've gotten so far,
+         * if any, and also act as the response we actually give to the
+         * instruction.
+         */
+        RequestPtr origReq = 
+            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
+        origReq->setPhys(req->getPaddr(), dataSize, flags);
+        PacketPtr bigPkt =
+            new Packet(origReq, MemCmd::WriteResp, Packet::Broadcast);
+        sendState->bigPkt = bigPkt;
+        bigPkt->allocate();
+        bigPkt->set(data);
+    } else {
+        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
+
+        // translate to physical address
+        Fault fault = thread->translateDataWriteReq(req);
+        if (fault != NoFault) {
+            delete req;
+            return fault;
+        }
+
+        MemCmd cmd = MemCmd::WriteReq; // default
 
         if (req->isLocked()) {
             cmd = MemCmd::StoreCondReq;
@@ -401,38 +577,27 @@
         assert(dcache_pkt == NULL);
         dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
         dcache_pkt->allocate();
-        dcache_pkt->set(data);
-
-        if (do_access) {
-            if (req->isMmapedIpr()) {
-                Tick delay;
-                dcache_pkt->set(htog(data));
-                delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
-                new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
-                _status = DcacheWaitResponse;
-                dcache_pkt = NULL;
-            } else if (!dcachePort.sendTiming(dcache_pkt)) {
-                _status = DcacheRetry;
-            } else {
-                _status = DcacheWaitResponse;
-                // memory system takes ownership of packet
-                dcache_pkt = NULL;
-            }
-        }
-        // This will need a new way to tell if it's hooked up to a cache or 
not.
-        if (req->isUncacheable())
-            recordEvent("Uncached Write");
-    } else {
-        delete req;
+        if (req->isMmapedIpr())
+            dcache_pkt->set(htog(data));
+        else
+            dcache_pkt->set(data);
     }
 
+    if (do_access)
+        handleWritePacket();
+
     if (traceData) {
+        traceData->setAddr(req->getVaddr());
         traceData->setData(data);
     }
 
+    // This will need a new way to tell if it's hooked up to a cache or not.
+    if (req->isUncacheable())
+        recordEvent("Uncached Write");
+
     // If the write needs to have a fault on the access, consider calling
     // changeStatus() and changing it to "bad addr write" or something.
-    return fault;
+    return NoFault;
 }
 
 Fault
@@ -721,11 +886,52 @@
     // received a response from the dcache: complete the load or store
     // instruction
     assert(!pkt->isError());
-    assert(_status == DcacheWaitResponse);
-    _status = Running;
 
     numCycles += tickToCycles(curTick - previousTick);
     previousTick = curTick;
+
+    if (pkt->senderState) {
+        SplitSenderState * sendState =
+            dynamic_cast<SplitSenderState *>(pkt->senderState);
+        assert(sendState);
+        pkt->senderState = NULL;
+        PacketPtr secondPkt = sendState->secondPkt;
+        PacketPtr bigPkt = sendState->bigPkt;
+        if (secondPkt) {
+            // There's more data to get.
+            secondPkt->senderState = sendState;
+            sendState->secondPkt = NULL;
+            assert(pkt->getSize() < bigPkt->getSize());
+            if (bigPkt->isRead()) {
+                memcpy(bigPkt->getPtr<uint8_t>(),
+                       pkt->getPtr<uint8_t>(),
+                       pkt->getSize());
+                delete pkt->req;
+                delete pkt;
+                handleReadPacket(secondPkt);
+            } else {
+                dcache_pkt = secondPkt;
+                handleWritePacket();
+            }
+            return;
+        } else {
+            // This is it. Lets finish this up.
+            assert(bigPkt->getSize() > pkt->getSize());
+            Addr offset = bigPkt->getSize() - pkt->getSize();
+            if (bigPkt->isRead()) {
+                memcpy(bigPkt->getPtr<uint8_t>() + offset,
+                       pkt->getPtr<uint8_t>(),
+                       pkt->getSize());
+            }
+            delete pkt->req;
+            delete pkt;
+            delete sendState;
+            pkt = bigPkt;
+        }
+    }
+
+    assert(_status == DcacheWaitResponse);
+    _status = Running;
 
     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
 
@@ -787,10 +993,11 @@
         // delay processing of returned data until next CPU clock edge
         Tick next_tick = cpu->nextCycle(curTick);
 
-        if (next_tick == curTick)
+        if (next_tick == curTick) {
             cpu->completeDataAccess(pkt);
-        else
+        } else {
             tickEvent.schedule(pkt, next_tick);
+        }
 
         return true;
     }
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -48,6 +48,17 @@
     Event *drainEvent;
 
   private:
+
+    class SplitSenderState : public Packet::SenderState
+    {
+      public:
+        PacketPtr bigPkt;
+        PacketPtr secondPkt;
+    };
+
+    void handleReadPacket(PacketPtr pkt);
+    // This function always implicitly uses dcache_pkt.
+    void handleWritePacket();
 
     class CpuPort : public Port
     {
_______________________________________________
m5-dev mailing list
m5-dev@m5sim.org
http://m5sim.org/mailman/listinfo/m5-dev

[m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

Reply via email to