Re: [m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

nathan binkert Sun, 09 Nov 2008 09:59:26 -0800

Aweseome.  My concern is that the read/write functions are getting
long and hard to read.  Is there anything that makes sense to break
out into a second function?  (i.e. the actual unaligned handling part?
 Maybe a readUnaligned and writeUnaligned function with just a simple
check in read/write to see if it needs to call the other?


  Nate

On Sat, Nov 8, 2008 at 12:08 AM,  <[EMAIL PROTECTED]> wrote:
> # HG changeset patch
> # User Gabe Black <[EMAIL PROTECTED]>
> # Date 1226044295 28800
> # Node ID d562cd16ff32bbdb73a5f571e40496a039092e52
> # Parent  2e61b60e6614e026ba055946a45c5f577d8d8ff8
> CPU: Make unaligned accesses work in the timing simple CPU.
>
> diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
> --- a/src/cpu/simple/timing.cc
> +++ b/src/cpu/simple/timing.cc
> @@ -241,57 +241,130 @@
>     _status = Idle;
>  }
>
> +void
> +TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
> +{
> +    RequestPtr req = pkt->req;
> +    if (req->isMmapedIpr()) {
> +        Tick delay;
> +        delay = TheISA::handleIprRead(thread->getTC(), pkt);
> +        new IprEvent(pkt, this, nextCycle(curTick + delay));
> +        _status = DcacheWaitResponse;
> +        dcache_pkt = NULL;
> +    } else if (!dcachePort.sendTiming(pkt)) {
> +        _status = DcacheRetry;
> +        dcache_pkt = pkt;
> +    } else {
> +        _status = DcacheWaitResponse;
> +        // memory system takes ownership of packet
> +        dcache_pkt = NULL;
> +    }
> +}
>
>  template <class T>
>  Fault
>  TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
>  {
> -    Request *req =
> -        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
> -                    _cpuId, /* thread ID */ 0);
> +    Fault fault;
> +    const int asid = 0;
> +    const int threadId = 0;
> +    const Addr pc = thread->readPC();
>
> -    if (traceData) {
> -        traceData->setAddr(req->getVaddr());
> +    PacketPtr pkt;
> +    RequestPtr req;
> +
> +    int blockSize = dcachePort.peerBlockSize();
> +    int dataSize = sizeof(T);
> +
> +    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
> +
> +    if (secondAddr > addr) {
> +        Addr firstSize = secondAddr - addr;
> +        Addr secondSize = dataSize - firstSize;
> +        // Make sure we'll only need two accesses.
> +        assert(roundDown(secondAddr + secondSize - 1, blockSize) ==
> +                secondAddr);
> +
> +        /*
> +         * Do the translations. If something isn't going to work, find out
> +         * before we waste time setting up anything else.
> +         */
> +        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, 
> threadId);
> +        fault = thread->translateDataReadReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +        Request *secondReq =
> +            new Request(asid, secondAddr, secondSize,
> +                        flags, pc, _cpuId, threadId);
> +        fault = thread->translateDataReadReq(secondReq);
> +        if (fault != NoFault) {
> +            delete req;
> +            delete secondReq;
> +            return fault;
> +        }
> +
> +        // This is the packet we'll process now.
> +        pkt = new Packet(req,
> +                         (req->isLocked() ?
> +                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                         Packet::Broadcast);
> +        pkt->dataDynamic<uint8_t>(new uint8_t[firstSize]);
> +        SplitSenderState *sendState = new SplitSenderState;
> +        pkt->senderState = sendState;
> +
> +        // This is the second half of the access we'll deal with later.
> +        PacketPtr secondPkt =
> +            new Packet(secondReq,
> +                       (secondReq->isLocked() ?
> +                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                       Packet::Broadcast);
> +        secondPkt->dataDynamic<uint8_t>(new uint8_t[secondSize]);
> +        sendState->secondPkt = secondPkt;
> +
> +        /*
> +         * This is the big packet that will hold the data we've gotten so 
> far,
> +         * if any, and also act as the response we actually give to the
> +         * instruction.
> +         */
> +        Request *origReq =
> +            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +        origReq->setPhys(req->getPaddr(), dataSize, flags);
> +        PacketPtr bigPkt =
> +            new Packet(origReq, MemCmd::ReadResp, Packet::Broadcast);
> +        bigPkt->dataDynamic<T>(new T);
> +        sendState->bigPkt = bigPkt;
> +    } else {
> +        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +
> +        // translate to physical address
> +        Fault fault = thread->translateDataReadReq(req);
> +
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +
> +        pkt = new Packet(req,
> +                         (req->isLocked() ?
> +                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                          Packet::Broadcast);
> +        pkt->dataDynamic<T>(new T);
>     }
>
> -   // translate to physical address
> -    Fault fault = thread->translateDataReadReq(req);
> -
> -    // Now do the access.
> -    if (fault == NoFault) {
> -        PacketPtr pkt =
> -            new Packet(req,
> -                       (req->isLocked() ?
> -                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
> -                       Packet::Broadcast);
> -        pkt->dataDynamic<T>(new T);
> -
> -        if (req->isMmapedIpr()) {
> -            Tick delay;
> -            delay = TheISA::handleIprRead(thread->getTC(), pkt);
> -            new IprEvent(pkt, this, nextCycle(curTick + delay));
> -            _status = DcacheWaitResponse;
> -            dcache_pkt = NULL;
> -        } else if (!dcachePort.sendTiming(pkt)) {
> -            _status = DcacheRetry;
> -            dcache_pkt = pkt;
> -        } else {
> -            _status = DcacheWaitResponse;
> -            // memory system takes ownership of packet
> -            dcache_pkt = NULL;
> -        }
> -
> -        // This will need a new way to tell if it has a dcache attached.
> -        if (req->isUncacheable())
> -            recordEvent("Uncached Read");
> -    } else {
> -        delete req;
> -    }
> +    handleReadPacket(pkt);
>
>     if (traceData) {
>         traceData->setData(data);
> +        traceData->setAddr(addr);
>     }
> -    return fault;
> +
> +    // This will need a new way to tell if it has a dcache attached.
> +    if (req->isUncacheable())
> +        recordEvent("Uncached Read");
> +
> +    return NoFault;
>  }
>
>  Fault
> @@ -364,26 +437,129 @@
>     return read(addr, (uint32_t&)data, flags);
>  }
>
> +void
> +TimingSimpleCPU::handleWritePacket()
> +{
> +    RequestPtr req = dcache_pkt->req;
> +    if (req->isMmapedIpr()) {
> +        Tick delay;
> +        delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
> +        new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
> +        _status = DcacheWaitResponse;
> +        dcache_pkt = NULL;
> +    } else if (!dcachePort.sendTiming(dcache_pkt)) {
> +        _status = DcacheRetry;
> +    } else {
> +        _status = DcacheWaitResponse;
> +        // memory system takes ownership of packet
> +        dcache_pkt = NULL;
> +    }
> +}
>
>  template <class T>
>  Fault
>  TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
>  {
> -    Request *req =
> -        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
> -                    _cpuId, /* thread ID */ 0);
> +    const int asid = 0;
> +    const int threadId = 0;
> +    bool do_access = true;  // flag to suppress cache access
> +    const Addr pc = thread->readPC();
>
> -    if (traceData) {
> -        traceData->setAddr(req->getVaddr());
> -    }
> +    RequestPtr req;
>
> -    // translate to physical address
> -    Fault fault = thread->translateDataWriteReq(req);
> +    int blockSize = dcachePort.peerBlockSize();
> +    int dataSize = sizeof(T);
>
> -    // Now do the access.
> -    if (fault == NoFault) {
> +    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
> +
> +    if (secondAddr > addr) {
> +        Fault fault;
> +        Addr firstSize = secondAddr - addr;
> +        Addr secondSize = dataSize - firstSize;
> +        // Make sure we'll only need two accesses.
> +        assert(roundDown(secondAddr + secondSize - 1, blockSize) ==
> +                secondAddr);
> +
> +        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, 
> threadId);
> +        fault = thread->translateDataWriteReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +        RequestPtr secondReq = new Request(asid, secondAddr, secondSize,
> +                                           flags, pc, _cpuId, threadId);
> +        fault = thread->translateDataWriteReq(secondReq);
> +        if (fault != NoFault) {
> +            delete req;
> +            delete secondReq;
> +            return fault;
> +        }
> +
>         MemCmd cmd = MemCmd::WriteReq; // default
> -        bool do_access = true;  // flag to suppress cache access
> +        if (req->isLocked()) {
> +            cmd = MemCmd::StoreCondReq;
> +            do_access = TheISA::handleLockedWrite(thread, req);
> +        } else if (req->isSwap()) {
> +            panic("Conditional swaps can't be split.");
> +        }
> +
> +        MemCmd secondCmd = MemCmd::WriteReq; // default
> +        bool secondDoAccess = do_access;
> +        if (secondReq->isLocked()) {
> +            secondCmd = MemCmd::StoreCondReq;
> +            secondDoAccess = TheISA::handleLockedWrite(thread, secondReq);
> +        } else if (req->isSwap()) {
> +            panic("Conditional swaps can't be split.");
> +        }
> +
> +        if (do_access != secondDoAccess) {
> +            panic("Both parts of a split store conditional"
> +                  " must succeed or fail.\n");
> +        }
> +
> +        uint8_t *dataPtr;
> +
> +        assert(dcache_pkt == NULL);
> +        // This is the packet we'll process now.
> +        dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
> +        dataPtr = new uint8_t[firstSize];
> +        memcpy(dataPtr, &data, firstSize);
> +        dcache_pkt->dataDynamic(dataPtr);
> +        SplitSenderState *sendState = new SplitSenderState;
> +        dcache_pkt->senderState = sendState;
> +
> +        // This is the second half of the access we'll deal with later.
> +        PacketPtr secondPkt =
> +            new Packet(secondReq, secondCmd, Packet::Broadcast);
> +        dataPtr = new uint8_t[secondSize];
> +        memcpy(dataPtr, ((uint8_t *)&data) + firstSize, secondSize);
> +        secondPkt->dataDynamic<uint8_t>(dataPtr);
> +        sendState->secondPkt = secondPkt;
> +
> +        /*
> +         * This is the big packet that will hold the data we've gotten so 
> far,
> +         * if any, and also act as the response we actually give to the
> +         * instruction.
> +         */
> +        RequestPtr origReq =
> +            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +        origReq->setPhys(req->getPaddr(), dataSize, flags);
> +        PacketPtr bigPkt =
> +            new Packet(origReq, MemCmd::WriteResp, Packet::Broadcast);
> +        sendState->bigPkt = bigPkt;
> +        bigPkt->allocate();
> +        bigPkt->set(data);
> +    } else {
> +        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +
> +        // translate to physical address
> +        Fault fault = thread->translateDataWriteReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +
> +        MemCmd cmd = MemCmd::WriteReq; // default
>
>         if (req->isLocked()) {
>             cmd = MemCmd::StoreCondReq;
> @@ -401,38 +577,27 @@
>         assert(dcache_pkt == NULL);
>         dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
>         dcache_pkt->allocate();
> -        dcache_pkt->set(data);
> -
> -        if (do_access) {
> -            if (req->isMmapedIpr()) {
> -                Tick delay;
> -                dcache_pkt->set(htog(data));
> -                delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
> -                new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
> -                _status = DcacheWaitResponse;
> -                dcache_pkt = NULL;
> -            } else if (!dcachePort.sendTiming(dcache_pkt)) {
> -                _status = DcacheRetry;
> -            } else {
> -                _status = DcacheWaitResponse;
> -                // memory system takes ownership of packet
> -                dcache_pkt = NULL;
> -            }
> -        }
> -        // This will need a new way to tell if it's hooked up to a cache or 
> not.
> -        if (req->isUncacheable())
> -            recordEvent("Uncached Write");
> -    } else {
> -        delete req;
> +        if (req->isMmapedIpr())
> +            dcache_pkt->set(htog(data));
> +        else
> +            dcache_pkt->set(data);
>     }
>
> +    if (do_access)
> +        handleWritePacket();
> +
>     if (traceData) {
> +        traceData->setAddr(req->getVaddr());
>         traceData->setData(data);
>     }
>
> +    // This will need a new way to tell if it's hooked up to a cache or not.
> +    if (req->isUncacheable())
> +        recordEvent("Uncached Write");
> +
>     // If the write needs to have a fault on the access, consider calling
>     // changeStatus() and changing it to "bad addr write" or something.
> -    return fault;
> +    return NoFault;
>  }
>
>  Fault
> @@ -721,11 +886,52 @@
>     // received a response from the dcache: complete the load or store
>     // instruction
>     assert(!pkt->isError());
> -    assert(_status == DcacheWaitResponse);
> -    _status = Running;
>
>     numCycles += tickToCycles(curTick - previousTick);
>     previousTick = curTick;
> +
> +    if (pkt->senderState) {
> +        SplitSenderState * sendState =
> +            dynamic_cast<SplitSenderState *>(pkt->senderState);
> +        assert(sendState);
> +        pkt->senderState = NULL;
> +        PacketPtr secondPkt = sendState->secondPkt;
> +        PacketPtr bigPkt = sendState->bigPkt;
> +        if (secondPkt) {
> +            // There's more data to get.
> +            secondPkt->senderState = sendState;
> +            sendState->secondPkt = NULL;
> +            assert(pkt->getSize() < bigPkt->getSize());
> +            if (bigPkt->isRead()) {
> +                memcpy(bigPkt->getPtr<uint8_t>(),
> +                       pkt->getPtr<uint8_t>(),
> +                       pkt->getSize());
> +                delete pkt->req;
> +                delete pkt;
> +                handleReadPacket(secondPkt);
> +            } else {
> +                dcache_pkt = secondPkt;
> +                handleWritePacket();
> +            }
> +            return;
> +        } else {
> +            // This is it. Lets finish this up.
> +            assert(bigPkt->getSize() > pkt->getSize());
> +            Addr offset = bigPkt->getSize() - pkt->getSize();
> +            if (bigPkt->isRead()) {
> +                memcpy(bigPkt->getPtr<uint8_t>() + offset,
> +                       pkt->getPtr<uint8_t>(),
> +                       pkt->getSize());
> +            }
> +            delete pkt->req;
> +            delete pkt;
> +            delete sendState;
> +            pkt = bigPkt;
> +        }
> +    }
> +
> +    assert(_status == DcacheWaitResponse);
> +    _status = Running;
>
>     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
>
> @@ -787,10 +993,11 @@
>         // delay processing of returned data until next CPU clock edge
>         Tick next_tick = cpu->nextCycle(curTick);
>
> -        if (next_tick == curTick)
> +        if (next_tick == curTick) {
>             cpu->completeDataAccess(pkt);
> -        else
> +        } else {
>             tickEvent.schedule(pkt, next_tick);
> +        }
>
>         return true;
>     }
> diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
> --- a/src/cpu/simple/timing.hh
> +++ b/src/cpu/simple/timing.hh
> @@ -48,6 +48,17 @@
>     Event *drainEvent;
>
>   private:
> +
> +    class SplitSenderState : public Packet::SenderState
> +    {
> +      public:
> +        PacketPtr bigPkt;
> +        PacketPtr secondPkt;
> +    };
> +
> +    void handleReadPacket(PacketPtr pkt);
> +    // This function always implicitly uses dcache_pkt.
> +    void handleWritePacket();
>
>     class CpuPort : public Port
>     {
> _______________________________________________
> m5-dev mailing list
> m5-dev@m5sim.org
> http://m5sim.org/mailman/listinfo/m5-dev
>
>
_______________________________________________
m5-dev mailing list
m5-dev@m5sim.org
http://m5sim.org/mailman/listinfo/m5-dev

Re: [m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

Reply via email to