Thanks Gabe.

I've run all regressions and there are some that fail.  I've checked the  
differences and it's mainly changes in the number of DTB accesses (as  
expected).  In some cases there are more Dcache accesses too (also  
expected if speculative I guess).  In general though, the number of ticks  
stays exactly the same.

Steve, if you could give me an idea of how to incorporate the templating  
into this, then I'll fix that all up too.  I'll hold off committing until  
that's all done.

Cheers
Tim

On Tue, 01 Dec 2009 10:24:43 -0000, Gabe Black <[email protected]>  
wrote:

> Generally this looks like a fairly straightforward port replacement of
> the original code with something essentially equivalent which is fine.
> One issue is the new virtual finishTranslation function. Since
> translation will happen very frequently (at least once a fetch), it's
> best to avoid a virtual function call and the slight performance
> overhead. It's small, but it adds up if you multiply it by several
> billion instructions. Steve can probably tell you the right way to weave
> that into the templating set up of the CPUs (it confuses me some times),
> so I'll leave that to him to comment on. Please let me know if there
> were any other interface/functional changes that I'm missing. I saw the
> setFault and isPrefetch functions, but those are going to be inlined
> anyway they're nothing to worry about.
>
> Since changes to the CPUs can affect all ISAs in potentially subtle,
> obscure ways, please run all of the regressions once before submitting
> if you haven't already. Please also give Steve a chance to look this
> over if possible.
>
> Gabe
>
> Timothy M. Jones wrote:
>> # HG changeset patch
>> # User Timothy M. Jones <[email protected]>
>> # Date 1259064162 0
>> # Node ID 77dffd16710c4ad8e308b7dfee75cce4219b30a0
>> # Parent  b431ed08a6b7d809df9bfed51365e4d3bdf81f93
>> Cpu: Make TimingSimpleCPU use new DTB translation code.
>>
>> This patch removes the duplicate code from cpu/simple/timing.hh and  
>> makes
>> TimingSimpleCPU use the new code in cpu/translation.hh.  I've had to  
>> make
>> a few additional changes to get it to work ok for both O3 and Timing.
>>
>> diff --git a/src/cpu/base.hh b/src/cpu/base.hh
>> --- a/src/cpu/base.hh
>> +++ b/src/cpu/base.hh
>> @@ -53,6 +53,7 @@
>>  class ThreadContext;
>>  class System;
>>  class Port;
>> +class WholeTranslationState;
>>
>>  namespace TheISA
>>  {
>> @@ -276,6 +277,12 @@
>>
>>      virtual Counter totalInstructions() const { return 0; }
>>
>> +    /**
>> +     * Finish a DTB translation.
>> +     * @param state The DTB translation state.
>> +     */
>> +    virtual void finishTranslation(WholeTranslationState *state) {}
>> +
>>      // Function tracing
>>    private:
>>      bool functionTracingEnabled;
>> diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
>> --- a/src/cpu/base_dyn_inst.hh
>> +++ b/src/cpu/base_dyn_inst.hh
>> @@ -979,9 +979,9 @@
>>                                         BaseTLB::Mode mode)
>>  {
>>      WholeTranslationState *state =
>> -        new WholeTranslationState(req, res, mode);
>> -    DataTranslation<Impl> *trans =
>> -        new DataTranslation<Impl>(this, state);
>> +        new WholeTranslationState(req, NULL, res, mode);
>> +    DataTranslationInst<Impl> *trans =
>> +        new DataTranslationInst<Impl>(this, state);
>>      cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
>>  }
>>
>> @@ -993,11 +993,11 @@
>>  {
>>      // Set up the translation state.
>>      WholeTranslationState *state =
>> -        new WholeTranslationState(req, sreqLow, sreqHigh, res, mode);
>> -    DataTranslation<Impl> *stransLow =
>> -        new DataTranslation<Impl>(this, state, 0);
>> -    DataTranslation<Impl> *stransHigh =
>> -        new DataTranslation<Impl>(this, state, 1);
>> +        new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res,  
>> mode);
>> +    DataTranslationInst<Impl> *stransLow =
>> +        new DataTranslationInst<Impl>(this, state, 0);
>> +    DataTranslationInst<Impl> *stransHigh =
>> +        new DataTranslationInst<Impl>(this, state, 1);
>>
>>      // Perform the translation.
>>      cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow,  
>> mode);
>> diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
>> --- a/src/cpu/simple/timing.cc
>> +++ b/src/cpu/simple/timing.cc
>> @@ -268,19 +268,9 @@
>>  }
>>
>>  void
>> -TimingSimpleCPU::sendData(Fault fault, RequestPtr req,
>> -        uint8_t *data, uint64_t *res, bool read)
>> +TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
>> +                          bool read)
>>  {
>> -    _status = Running;
>> -    if (fault != NoFault) {
>> -        if (req->isPrefetch())
>> -            fault = NoFault;
>> -        delete data;
>> -        delete req;
>> -
>> -        translationFault(fault);
>> -        return;
>> -    }
>>      PacketPtr pkt;
>>      buildPacket(pkt, req, read);
>>      pkt->dataDynamic<uint8_t>(data);
>> @@ -311,25 +301,9 @@
>>  }
>>
>>  void
>> -TimingSimpleCPU::sendSplitData(Fault fault1, Fault fault2,
>> -        RequestPtr req1, RequestPtr req2, RequestPtr req,
>> -        uint8_t *data, bool read)
>> +TimingSimpleCPU::sendSplitData(RequestPtr req1, RequestPtr req2,
>> +                               RequestPtr req, uint8_t *data, bool  
>> read)
>>  {
>> -    _status = Running;
>> -    if (fault1 != NoFault || fault2 != NoFault) {
>> -        if (req1->isPrefetch())
>> -            fault1 = NoFault;
>> -        if (req2->isPrefetch())
>> -            fault2 = NoFault;
>> -        delete data;
>> -        delete req1;
>> -        delete req2;
>> -        if (fault1 != NoFault)
>> -            translationFault(fault1);
>> -        else if (fault2 != NoFault)
>> -            translationFault(fault2);
>> -        return;
>> -    }
>>      PacketPtr pkt1, pkt2;
>>      buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read);
>>      if (req->getFlags().isSet(Request::NO_ACCESS)) {
>> @@ -450,6 +424,7 @@
>>      const Addr pc = thread->readPC();
>>      unsigned block_size = dcachePort.peerBlockSize();
>>      int data_size = sizeof(T);
>> +    BaseTLB::Mode mode = BaseTLB::Read;
>>
>>      RequestPtr req  = new Request(asid, addr, data_size,
>>                                    flags, pc, _cpuId, tid);
>> @@ -457,24 +432,27 @@
>>      Addr split_addr = roundDown(addr + data_size - 1, block_size);
>>      assert(split_addr <= addr || split_addr - addr < block_size);
>>
>> -
>>      _status = DTBWaitResponse;
>>      if (split_addr > addr) {
>>          RequestPtr req1, req2;
>>          assert(!req->isLLSC() && !req->isSwap());
>>          req->splitOnVaddr(split_addr, req1, req2);
>>
>> -        typedef SplitDataTranslation::WholeTranslationState WholeState;
>> -        WholeState *state = new WholeState(req1, req2, req,
>> -                                           (uint8_t *)(new T),  
>> BaseTLB::Read);
>> -        thread->dtb->translateTiming(req1, tc,
>> -                new SplitDataTranslation(this, 0, state),  
>> BaseTLB::Read);
>> -        thread->dtb->translateTiming(req2, tc,
>> -                new SplitDataTranslation(this, 1, state),  
>> BaseTLB::Read);
>> +        WholeTranslationState *state =
>> +            new WholeTranslationState(req, req1, req2, (uint8_t *)(new  
>> T),
>> +                                      NULL, mode);
>> +        DataTranslationCpu *trans1 =
>> +            new DataTranslationCpu(this, state, 0);
>> +        DataTranslationCpu *trans2 =
>> +            new DataTranslationCpu(this, state, 1);
>> +
>> +        thread->dtb->translateTiming(req1, tc, trans1, mode);
>> +        thread->dtb->translateTiming(req2, tc, trans2, mode);
>>      } else {
>> -        DataTranslation *translation =
>> -            new DataTranslation(this, (uint8_t *)(new T), NULL,  
>> BaseTLB::Read);
>> -        thread->dtb->translateTiming(req, tc, translation,  
>> BaseTLB::Read);
>> +        WholeTranslationState *state =
>> +            new WholeTranslationState(req, (uint8_t *)(new T), NULL,  
>> mode);
>> +        DataTranslationCpu *translation = new DataTranslationCpu(this,  
>> state);
>> +        thread->dtb->translateTiming(req, tc, translation, mode);
>>      }
>>
>>      if (traceData) {
>> @@ -568,6 +546,7 @@
>>      const Addr pc = thread->readPC();
>>      unsigned block_size = dcachePort.peerBlockSize();
>>      int data_size = sizeof(T);
>> +    BaseTLB::Mode mode = BaseTLB::Write;
>>
>>      RequestPtr req = new Request(asid, addr, data_size,
>>                                   flags, pc, _cpuId, tid);
>> @@ -583,17 +562,21 @@
>>          assert(!req->isLLSC() && !req->isSwap());
>>          req->splitOnVaddr(split_addr, req1, req2);
>>
>> -        typedef SplitDataTranslation::WholeTranslationState WholeState;
>> -        WholeState *state = new WholeState(req1, req2, req,
>> -                (uint8_t *)dataP, BaseTLB::Write);
>> -        thread->dtb->translateTiming(req1, tc,
>> -                new SplitDataTranslation(this, 0, state),  
>> BaseTLB::Write);
>> -        thread->dtb->translateTiming(req2, tc,
>> -                new SplitDataTranslation(this, 1, state),  
>> BaseTLB::Write);
>> +        WholeTranslationState *state =
>> +            new WholeTranslationState(req, req1, req2, (uint8_t  
>> *)dataP,
>> +                                      res, mode);
>> +        DataTranslationCpu *trans1 =
>> +            new DataTranslationCpu(this, state, 0);
>> +        DataTranslationCpu *trans2 =
>> +            new DataTranslationCpu(this, state, 1);
>> +
>> +        thread->dtb->translateTiming(req1, tc, trans1, mode);
>> +        thread->dtb->translateTiming(req2, tc, trans2, mode);
>>      } else {
>> -        DataTranslation *translation =
>> -            new DataTranslation(this, (uint8_t *)dataP, res,  
>> BaseTLB::Write);
>> -        thread->dtb->translateTiming(req, tc, translation,  
>> BaseTLB::Write);
>> +        WholeTranslationState *state =
>> +            new WholeTranslationState(req, (uint8_t *)dataP, res,  
>> mode);
>> +        DataTranslationCpu *translation = new DataTranslationCpu(this,  
>> state);
>> +        thread->dtb->translateTiming(req, tc, translation, mode);
>>      }
>>
>>      if (traceData) {
>> @@ -668,6 +651,32 @@
>>
>>
>>  void
>> +TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
>> +{
>> +    _status = Running;
>> +
>> +    if (state->getFault() != NoFault) {
>> +        if (state->isPrefetch()) {
>> +            state->setNoFault();
>> +        }
>> +        delete state->data;
>> +        state->deleteReqs();
>> +        translationFault(state->getFault());
>> +    } else {
>> +        if (!state->isSplit) {
>> +            sendData(state->mainReq, state->data, state->res,
>> +                     state->mode == BaseTLB::Read);
>> +        } else {
>> +            sendSplitData(state->sreqLow, state->sreqHigh,  
>> state->mainReq,
>> +                          state->data, state->mode == BaseTLB::Read);
>> +        }
>> +    }
>> +
>> +    delete state;
>> +}
>> +
>> +
>> +void
>>  TimingSimpleCPU::fetch()
>>  {
>>      DPRINTF(SimpleCPU, "Fetch\n");
>> diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
>> --- a/src/cpu/simple/timing.hh
>> +++ b/src/cpu/simple/timing.hh
>> @@ -32,6 +32,7 @@
>>  #define __CPU_SIMPLE_TIMING_HH__
>>
>>  #include "cpu/simple/base.hh"
>> +#include "cpu/translation.hh"
>>
>>  #include "params/TimingSimpleCPU.hh"
>>
>> @@ -115,95 +116,9 @@
>>      };
>>      FetchTranslation fetchTranslation;
>>
>> -    class DataTranslation : public BaseTLB::Translation
>> -    {
>> -      protected:
>> -        TimingSimpleCPU *cpu;
>> -        uint8_t *data;
>> -        uint64_t *res;
>> -        BaseTLB::Mode mode;
>> -
>> -      public:
>> -        DataTranslation(TimingSimpleCPU *_cpu,
>> -                uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode)
>> -            : cpu(_cpu), data(_data), res(_res), mode(_mode)
>> -        {
>> -            assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
>> -        }
>> -
>> -        void
>> -        finish(Fault fault, RequestPtr req, ThreadContext *tc,
>> -               BaseTLB::Mode mode)
>> -        {
>> -            assert(mode == this->mode);
>> -            cpu->sendData(fault, req, data, res, mode ==  
>> BaseTLB::Read);
>> -            delete this;
>> -        }
>> -    };
>> -
>> -    class SplitDataTranslation : public BaseTLB::Translation
>> -    {
>> -      public:
>> -        struct WholeTranslationState
>> -        {
>> -          public:
>> -            int outstanding;
>> -            RequestPtr requests[2];
>> -            RequestPtr mainReq;
>> -            Fault faults[2];
>> -            uint8_t *data;
>> -            BaseTLB::Mode mode;
>> -
>> -            WholeTranslationState(RequestPtr req1, RequestPtr req2,
>> -                    RequestPtr main, uint8_t *data, BaseTLB::Mode mode)
>> -            {
>> -                assert(mode == BaseTLB::Read || mode ==  
>> BaseTLB::Write);
>> -
>> -                outstanding = 2;
>> -                requests[0] = req1;
>> -                requests[1] = req2;
>> -                mainReq = main;
>> -                faults[0] = faults[1] = NoFault;
>> -                this->data = data;
>> -                this->mode = mode;
>> -            }
>> -        };
>> -
>> -        TimingSimpleCPU *cpu;
>> -        int index;
>> -        WholeTranslationState *state;
>> -
>> -        SplitDataTranslation(TimingSimpleCPU *_cpu, int _index,
>> -                WholeTranslationState *_state)
>> -            : cpu(_cpu), index(_index), state(_state)
>> -        {}
>> -
>> -        void
>> -        finish(Fault fault, RequestPtr req, ThreadContext *tc,
>> -               BaseTLB::Mode mode)
>> -        {
>> -            assert(state);
>> -            assert(state->outstanding);
>> -            state->faults[index] = fault;
>> -            if (--state->outstanding == 0) {
>> -                cpu->sendSplitData(state->faults[0],
>> -                                   state->faults[1],
>> -                                   state->requests[0],
>> -                                   state->requests[1],
>> -                                   state->mainReq,
>> -                                   state->data,
>> -                                   state->mode == BaseTLB::Read);
>> -                delete state;
>> -            }
>> -            delete this;
>> -        }
>> -    };
>> -
>> -    void sendData(Fault fault, RequestPtr req,
>> -            uint8_t *data, uint64_t *res, bool read);
>> -    void sendSplitData(Fault fault1, Fault fault2,
>> -            RequestPtr req1, RequestPtr req2, RequestPtr req,
>> -            uint8_t *data, bool read);
>> +    void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool  
>> read);
>> +    void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr  
>> req,
>> +                       uint8_t *data, bool read);
>>
>>      void translationFault(Fault fault);
>>
>> @@ -351,6 +266,12 @@
>>       */
>>      void printAddr(Addr a);
>>
>> +    /**
>> +     * Finish a DTB translation.
>> +     * @param state The DTB translation state.
>> +     */
>> +    virtual void finishTranslation(WholeTranslationState *state);
>> +
>>    private:
>>
>>      typedef EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>  
>> FetchEvent;
>> diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh
>> --- a/src/cpu/translation.hh
>> +++ b/src/cpu/translation.hh
>> @@ -34,6 +34,7 @@
>>  #define __CPU_TRANSLATION_HH__
>>
>>  #include "sim/tlb.hh"
>> +#include "cpu/base.hh"
>>
>>  // Forward declaration
>>  template <class Impl>
>> @@ -50,13 +51,15 @@
>>      RequestPtr mainReq;
>>      RequestPtr sreqLow;
>>      RequestPtr sreqHigh;
>> +    uint8_t *data;
>>      uint64_t *res;
>>      BaseTLB::Mode mode;
>>
>>      /** Single translation state. */
>> -    WholeTranslationState(RequestPtr _req, uint64_t *_res,  
>> BaseTLB::Mode _mode)
>> +    WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t  
>> *_res,
>> +                          BaseTLB::Mode _mode)
>>          : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL),
>> -          sreqHigh(NULL), res(_res), mode(_mode)
>> +          sreqHigh(NULL), data(_data), res(_res), mode(_mode)
>>      {
>>          faults[0] = faults[1] = NoFault;
>>          assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
>> @@ -64,10 +67,10 @@
>>
>>      /** Split translation state. */
>>      WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow,
>> -                          RequestPtr _sreqHigh, uint64_t *_res,
>> +                          RequestPtr _sreqHigh, uint8_t *_data,  
>> uint64_t *_res,
>>                            BaseTLB::Mode _mode)
>>          : outstanding(2), isSplit(true), mainReq(_req),  
>> sreqLow(_sreqLow),
>> -          sreqHigh(_sreqHigh), res(_res), mode(_mode)
>> +          sreqHigh(_sreqHigh), data(_data), res(_res), mode(_mode)
>>      {
>>          faults[0] = faults[1] = NoFault;
>>          assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
>> @@ -104,12 +107,24 @@
>>              return NoFault;
>>      }
>>
>> +    void
>> +    setNoFault()
>> +    {
>> +        faults[0] = faults[1] = NoFault;
>> +    }
>> +
>>      bool
>>      isUncacheable() const
>>      {
>>          return mainReq->isUncacheable();
>>      }
>>
>> +    bool
>> +    isPrefetch() const
>> +    {
>> +        return mainReq->isPrefetch();
>> +    }
>> +
>>      Addr
>>      getPaddr() const
>>      {
>> @@ -134,7 +149,7 @@
>>  };
>>
>>  template <class Impl>
>> -class DataTranslation : public BaseTLB::Translation
>> +class DataTranslationInst : public BaseTLB::Translation
>>  {
>>    protected:
>>      typedef BaseDynInst<Impl> DynInst;
>> @@ -145,13 +160,13 @@
>>      int index;
>>
>>    public:
>> -    DataTranslation(DynInstPtr _inst, WholeTranslationState *_state)
>> +    DataTranslationInst(DynInstPtr _inst, WholeTranslationState  
>> *_state)
>>          : inst(_inst), state(_state), index(0)
>>      {
>>      }
>>
>> -    DataTranslation(DynInstPtr _inst, WholeTranslationState *_state,
>> -                    int _index)
>> +    DataTranslationInst(DynInstPtr _inst, WholeTranslationState  
>> *_state,
>> +                        int _index)
>>          : inst(_inst), state(_state), index(_index)
>>      {
>>      }
>> @@ -163,10 +178,44 @@
>>          assert(state);
>>          assert(mode == state->mode);
>>          if (state->finish(fault, index)) {
>> +            assert(inst);
>>              inst->finishTranslation(state);
>>          }
>>          delete this;
>>      }
>>  };
>>
>> +class DataTranslationCpu : public BaseTLB::Translation
>> +{
>> +  protected:
>> +    BaseCPU *cpu;
>> +    WholeTranslationState *state;
>> +    int index;
>> +
>> +  public:
>> +    DataTranslationCpu(BaseCPU *_cpu, WholeTranslationState *_state)
>> +        : cpu(_cpu), state(_state), index(0)
>> +    {
>> +    }
>> +
>> +    DataTranslationCpu(BaseCPU *_cpu, WholeTranslationState *_state,
>> +                       int _index)
>> +        : cpu(_cpu), state(_state), index(_index)
>> +    {
>> +    }
>> +
>> +    virtual void
>> +    finish(Fault fault, RequestPtr req, ThreadContext *tc,
>> +           BaseTLB::Mode mode)
>> +    {
>> +        assert(state);
>> +        assert(mode == state->mode);
>> +        if (state->finish(fault, index)) {
>> +            assert(cpu);
>> +            cpu->finishTranslation(state);
>> +        }
>> +        delete this;
>> +    }
>> +};
>> +
>>  #endif // __CPU_TRANSLATION_HH__
>>
>>
>
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>


-- 
The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to