Re: [m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

Steve Reinhardt Sat, 08 Nov 2008 20:25:49 -0800

Thanks, Gabe.  It's cool to have this working.  Just a few minor things:

- It looks like the two requests are issued to the cache
sequentially... did you consider issuing them concurrently?  I don't
see a reason why this wouldn't work, though it would be a little more
complex (since the responses could come back in either order).  I'm
not sure myself which one is preferable, or if it even matters that
much.


- Instead of allocating new dynamic memory blocks for the two sub
packets, why not just give them pointers into the larger block?  Then
you wouldn't have to do any merging after the responses come back for
loads, or copying for writes.

- It's still the official policy that local variables are lower
w/underscores while class members are mixed case... sometimes I have
mixed feelings about that one myself but I've been trying to do a
better job of sticking with it.

- I know we discussed this before, but given that we're not attempting
to handle split swaps, does it make sense to pretend we're handling
split ll/sc operations?

- Would it be possible to condense the code a bit?  I think doing
things a little more lockstep with the two requests would help; also
I'd think you can assume that both requests end up being the same in
terms of locked/unlocked etc.  Something like this:

Req *first_req = Request(...);
Req *second_req = Request(...);
fault = thread->translate(first_req);
if (fault == NoFault) {
    fault = thread->translate(second_req);
}
if (fault != NoFault) {
    delete first_req;
    delete second_req;
    return fault;
}

if (first_req->isSwap() || second_req->isSwap()) {
   panic(...);
}

etc.

Steve

On Sat, Nov 8, 2008 at 12:08 AM,  <[EMAIL PROTECTED]> wrote:
> # HG changeset patch
> # User Gabe Black <[EMAIL PROTECTED]>
> # Date 1226044295 28800
> # Node ID d562cd16ff32bbdb73a5f571e40496a039092e52
> # Parent  2e61b60e6614e026ba055946a45c5f577d8d8ff8
> CPU: Make unaligned accesses work in the timing simple CPU.
>
> diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
> --- a/src/cpu/simple/timing.cc
> +++ b/src/cpu/simple/timing.cc
> @@ -241,57 +241,130 @@
>     _status = Idle;
>  }
>
> +void
> +TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
> +{
> +    RequestPtr req = pkt->req;
> +    if (req->isMmapedIpr()) {
> +        Tick delay;
> +        delay = TheISA::handleIprRead(thread->getTC(), pkt);
> +        new IprEvent(pkt, this, nextCycle(curTick + delay));
> +        _status = DcacheWaitResponse;
> +        dcache_pkt = NULL;
> +    } else if (!dcachePort.sendTiming(pkt)) {
> +        _status = DcacheRetry;
> +        dcache_pkt = pkt;
> +    } else {
> +        _status = DcacheWaitResponse;
> +        // memory system takes ownership of packet
> +        dcache_pkt = NULL;
> +    }
> +}
>
>  template <class T>
>  Fault
>  TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
>  {
> -    Request *req =
> -        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
> -                    _cpuId, /* thread ID */ 0);
> +    Fault fault;
> +    const int asid = 0;
> +    const int threadId = 0;
> +    const Addr pc = thread->readPC();
>
> -    if (traceData) {
> -        traceData->setAddr(req->getVaddr());
> +    PacketPtr pkt;
> +    RequestPtr req;
> +
> +    int blockSize = dcachePort.peerBlockSize();
> +    int dataSize = sizeof(T);
> +
> +    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
> +
> +    if (secondAddr > addr) {
> +        Addr firstSize = secondAddr - addr;
> +        Addr secondSize = dataSize - firstSize;
> +        // Make sure we'll only need two accesses.
> +        assert(roundDown(secondAddr + secondSize - 1, blockSize) ==
> +                secondAddr);
> +
> +        /*
> +         * Do the translations. If something isn't going to work, find out
> +         * before we waste time setting up anything else.
> +         */
> +        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, 
> threadId);
> +        fault = thread->translateDataReadReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +        Request *secondReq =
> +            new Request(asid, secondAddr, secondSize,
> +                        flags, pc, _cpuId, threadId);
> +        fault = thread->translateDataReadReq(secondReq);
> +        if (fault != NoFault) {
> +            delete req;
> +            delete secondReq;
> +            return fault;
> +        }
> +
> +        // This is the packet we'll process now.
> +        pkt = new Packet(req,
> +                         (req->isLocked() ?
> +                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                         Packet::Broadcast);
> +        pkt->dataDynamic<uint8_t>(new uint8_t[firstSize]);
> +        SplitSenderState *sendState = new SplitSenderState;
> +        pkt->senderState = sendState;
> +
> +        // This is the second half of the access we'll deal with later.
> +        PacketPtr secondPkt =
> +            new Packet(secondReq,
> +                       (secondReq->isLocked() ?
> +                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                       Packet::Broadcast);
> +        secondPkt->dataDynamic<uint8_t>(new uint8_t[secondSize]);
> +        sendState->secondPkt = secondPkt;
> +
> +        /*
> +         * This is the big packet that will hold the data we've gotten so 
> far,
> +         * if any, and also act as the response we actually give to the
> +         * instruction.
> +         */
> +        Request *origReq =
> +            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +        origReq->setPhys(req->getPaddr(), dataSize, flags);
> +        PacketPtr bigPkt =
> +            new Packet(origReq, MemCmd::ReadResp, Packet::Broadcast);
> +        bigPkt->dataDynamic<T>(new T);
> +        sendState->bigPkt = bigPkt;
> +    } else {
> +        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +
> +        // translate to physical address
> +        Fault fault = thread->translateDataReadReq(req);
> +
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +
> +        pkt = new Packet(req,
> +                         (req->isLocked() ?
> +                          MemCmd::LoadLockedReq : MemCmd::ReadReq),
> +                          Packet::Broadcast);
> +        pkt->dataDynamic<T>(new T);
>     }
>
> -   // translate to physical address
> -    Fault fault = thread->translateDataReadReq(req);
> -
> -    // Now do the access.
> -    if (fault == NoFault) {
> -        PacketPtr pkt =
> -            new Packet(req,
> -                       (req->isLocked() ?
> -                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
> -                       Packet::Broadcast);
> -        pkt->dataDynamic<T>(new T);
> -
> -        if (req->isMmapedIpr()) {
> -            Tick delay;
> -            delay = TheISA::handleIprRead(thread->getTC(), pkt);
> -            new IprEvent(pkt, this, nextCycle(curTick + delay));
> -            _status = DcacheWaitResponse;
> -            dcache_pkt = NULL;
> -        } else if (!dcachePort.sendTiming(pkt)) {
> -            _status = DcacheRetry;
> -            dcache_pkt = pkt;
> -        } else {
> -            _status = DcacheWaitResponse;
> -            // memory system takes ownership of packet
> -            dcache_pkt = NULL;
> -        }
> -
> -        // This will need a new way to tell if it has a dcache attached.
> -        if (req->isUncacheable())
> -            recordEvent("Uncached Read");
> -    } else {
> -        delete req;
> -    }
> +    handleReadPacket(pkt);
>
>     if (traceData) {
>         traceData->setData(data);
> +        traceData->setAddr(addr);
>     }
> -    return fault;
> +
> +    // This will need a new way to tell if it has a dcache attached.
> +    if (req->isUncacheable())
> +        recordEvent("Uncached Read");
> +
> +    return NoFault;
>  }
>
>  Fault
> @@ -364,26 +437,129 @@
>     return read(addr, (uint32_t&)data, flags);
>  }
>
> +void
> +TimingSimpleCPU::handleWritePacket()
> +{
> +    RequestPtr req = dcache_pkt->req;
> +    if (req->isMmapedIpr()) {
> +        Tick delay;
> +        delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
> +        new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
> +        _status = DcacheWaitResponse;
> +        dcache_pkt = NULL;
> +    } else if (!dcachePort.sendTiming(dcache_pkt)) {
> +        _status = DcacheRetry;
> +    } else {
> +        _status = DcacheWaitResponse;
> +        // memory system takes ownership of packet
> +        dcache_pkt = NULL;
> +    }
> +}
>
>  template <class T>
>  Fault
>  TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
>  {
> -    Request *req =
> -        new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
> -                    _cpuId, /* thread ID */ 0);
> +    const int asid = 0;
> +    const int threadId = 0;
> +    bool do_access = true;  // flag to suppress cache access
> +    const Addr pc = thread->readPC();
>
> -    if (traceData) {
> -        traceData->setAddr(req->getVaddr());
> -    }
> +    RequestPtr req;
>
> -    // translate to physical address
> -    Fault fault = thread->translateDataWriteReq(req);
> +    int blockSize = dcachePort.peerBlockSize();
> +    int dataSize = sizeof(T);
>
> -    // Now do the access.
> -    if (fault == NoFault) {
> +    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
> +
> +    if (secondAddr > addr) {
> +        Fault fault;
> +        Addr firstSize = secondAddr - addr;
> +        Addr secondSize = dataSize - firstSize;
> +        // Make sure we'll only need two accesses.
> +        assert(roundDown(secondAddr + secondSize - 1, blockSize) ==
> +                secondAddr);
> +
> +        req = new Request(asid, addr, firstSize, flags, pc, _cpuId, 
> threadId);
> +        fault = thread->translateDataWriteReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +        RequestPtr secondReq = new Request(asid, secondAddr, secondSize,
> +                                           flags, pc, _cpuId, threadId);
> +        fault = thread->translateDataWriteReq(secondReq);
> +        if (fault != NoFault) {
> +            delete req;
> +            delete secondReq;
> +            return fault;
> +        }
> +
>         MemCmd cmd = MemCmd::WriteReq; // default
> -        bool do_access = true;  // flag to suppress cache access
> +        if (req->isLocked()) {
> +            cmd = MemCmd::StoreCondReq;
> +            do_access = TheISA::handleLockedWrite(thread, req);
> +        } else if (req->isSwap()) {
> +            panic("Conditional swaps can't be split.");
> +        }
> +
> +        MemCmd secondCmd = MemCmd::WriteReq; // default
> +        bool secondDoAccess = do_access;
> +        if (secondReq->isLocked()) {
> +            secondCmd = MemCmd::StoreCondReq;
> +            secondDoAccess = TheISA::handleLockedWrite(thread, secondReq);
> +        } else if (req->isSwap()) {
> +            panic("Conditional swaps can't be split.");
> +        }
> +
> +        if (do_access != secondDoAccess) {
> +            panic("Both parts of a split store conditional"
> +                  " must succeed or fail.\n");
> +        }
> +
> +        uint8_t *dataPtr;
> +
> +        assert(dcache_pkt == NULL);
> +        // This is the packet we'll process now.
> +        dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
> +        dataPtr = new uint8_t[firstSize];
> +        memcpy(dataPtr, &data, firstSize);
> +        dcache_pkt->dataDynamic(dataPtr);
> +        SplitSenderState *sendState = new SplitSenderState;
> +        dcache_pkt->senderState = sendState;
> +
> +        // This is the second half of the access we'll deal with later.
> +        PacketPtr secondPkt =
> +            new Packet(secondReq, secondCmd, Packet::Broadcast);
> +        dataPtr = new uint8_t[secondSize];
> +        memcpy(dataPtr, ((uint8_t *)&data) + firstSize, secondSize);
> +        secondPkt->dataDynamic<uint8_t>(dataPtr);
> +        sendState->secondPkt = secondPkt;
> +
> +        /*
> +         * This is the big packet that will hold the data we've gotten so 
> far,
> +         * if any, and also act as the response we actually give to the
> +         * instruction.
> +         */
> +        RequestPtr origReq =
> +            new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +        origReq->setPhys(req->getPaddr(), dataSize, flags);
> +        PacketPtr bigPkt =
> +            new Packet(origReq, MemCmd::WriteResp, Packet::Broadcast);
> +        sendState->bigPkt = bigPkt;
> +        bigPkt->allocate();
> +        bigPkt->set(data);
> +    } else {
> +        req = new Request(asid, addr, dataSize, flags, pc, _cpuId, threadId);
> +
> +        // translate to physical address
> +        Fault fault = thread->translateDataWriteReq(req);
> +        if (fault != NoFault) {
> +            delete req;
> +            return fault;
> +        }
> +
> +        MemCmd cmd = MemCmd::WriteReq; // default
>
>         if (req->isLocked()) {
>             cmd = MemCmd::StoreCondReq;
> @@ -401,38 +577,27 @@
>         assert(dcache_pkt == NULL);
>         dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
>         dcache_pkt->allocate();
> -        dcache_pkt->set(data);
> -
> -        if (do_access) {
> -            if (req->isMmapedIpr()) {
> -                Tick delay;
> -                dcache_pkt->set(htog(data));
> -                delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
> -                new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
> -                _status = DcacheWaitResponse;
> -                dcache_pkt = NULL;
> -            } else if (!dcachePort.sendTiming(dcache_pkt)) {
> -                _status = DcacheRetry;
> -            } else {
> -                _status = DcacheWaitResponse;
> -                // memory system takes ownership of packet
> -                dcache_pkt = NULL;
> -            }
> -        }
> -        // This will need a new way to tell if it's hooked up to a cache or 
> not.
> -        if (req->isUncacheable())
> -            recordEvent("Uncached Write");
> -    } else {
> -        delete req;
> +        if (req->isMmapedIpr())
> +            dcache_pkt->set(htog(data));
> +        else
> +            dcache_pkt->set(data);
>     }
>
> +    if (do_access)
> +        handleWritePacket();
> +
>     if (traceData) {
> +        traceData->setAddr(req->getVaddr());
>         traceData->setData(data);
>     }
>
> +    // This will need a new way to tell if it's hooked up to a cache or not.
> +    if (req->isUncacheable())
> +        recordEvent("Uncached Write");
> +
>     // If the write needs to have a fault on the access, consider calling
>     // changeStatus() and changing it to "bad addr write" or something.
> -    return fault;
> +    return NoFault;
>  }
>
>  Fault
> @@ -721,11 +886,52 @@
>     // received a response from the dcache: complete the load or store
>     // instruction
>     assert(!pkt->isError());
> -    assert(_status == DcacheWaitResponse);
> -    _status = Running;
>
>     numCycles += tickToCycles(curTick - previousTick);
>     previousTick = curTick;
> +
> +    if (pkt->senderState) {
> +        SplitSenderState * sendState =
> +            dynamic_cast<SplitSenderState *>(pkt->senderState);
> +        assert(sendState);
> +        pkt->senderState = NULL;
> +        PacketPtr secondPkt = sendState->secondPkt;
> +        PacketPtr bigPkt = sendState->bigPkt;
> +        if (secondPkt) {
> +            // There's more data to get.
> +            secondPkt->senderState = sendState;
> +            sendState->secondPkt = NULL;
> +            assert(pkt->getSize() < bigPkt->getSize());
> +            if (bigPkt->isRead()) {
> +                memcpy(bigPkt->getPtr<uint8_t>(),
> +                       pkt->getPtr<uint8_t>(),
> +                       pkt->getSize());
> +                delete pkt->req;
> +                delete pkt;
> +                handleReadPacket(secondPkt);
> +            } else {
> +                dcache_pkt = secondPkt;
> +                handleWritePacket();
> +            }
> +            return;
> +        } else {
> +            // This is it. Lets finish this up.
> +            assert(bigPkt->getSize() > pkt->getSize());
> +            Addr offset = bigPkt->getSize() - pkt->getSize();
> +            if (bigPkt->isRead()) {
> +                memcpy(bigPkt->getPtr<uint8_t>() + offset,
> +                       pkt->getPtr<uint8_t>(),
> +                       pkt->getSize());
> +            }
> +            delete pkt->req;
> +            delete pkt;
> +            delete sendState;
> +            pkt = bigPkt;
> +        }
> +    }
> +
> +    assert(_status == DcacheWaitResponse);
> +    _status = Running;
>
>     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
>
> @@ -787,10 +993,11 @@
>         // delay processing of returned data until next CPU clock edge
>         Tick next_tick = cpu->nextCycle(curTick);
>
> -        if (next_tick == curTick)
> +        if (next_tick == curTick) {
>             cpu->completeDataAccess(pkt);
> -        else
> +        } else {
>             tickEvent.schedule(pkt, next_tick);
> +        }
>
>         return true;
>     }
> diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
> --- a/src/cpu/simple/timing.hh
> +++ b/src/cpu/simple/timing.hh
> @@ -48,6 +48,17 @@
>     Event *drainEvent;
>
>   private:
> +
> +    class SplitSenderState : public Packet::SenderState
> +    {
> +      public:
> +        PacketPtr bigPkt;
> +        PacketPtr secondPkt;
> +    };
> +
> +    void handleReadPacket(PacketPtr pkt);
> +    // This function always implicitly uses dcache_pkt.
> +    void handleWritePacket();
>
>     class CpuPort : public Port
>     {
> _______________________________________________
> m5-dev mailing list
> m5-dev@m5sim.org
> http://m5sim.org/mailman/listinfo/m5-dev
>
_______________________________________________
m5-dev mailing list
m5-dev@m5sim.org
http://m5sim.org/mailman/listinfo/m5-dev

Re: [m5-dev] [PATCH 3 of 3] CPU: Make unaligned accesses work in the timing simple CPU

Reply via email to