This looks good, but just so I understand what changes were made, you're
making the squash stuff less picky and setting the target for
unconditional branches in made in decode consider delay slots, right?

Gabe

Korey Sewell wrote:
> changeset f0841ee466a5 in /z/repo/m5
> details: http://repo.m5sim.org/m5?cmd=changeset;node=f0841ee466a5
> description:
>       o3-delay-slot-bpred: fix decode stage handling of uncdtl. branches.\n 
> decode stage was not setting the predicted PC correctly or passing that 
> information back to fetch correctly
>
> diffstat:
>
> 7 files changed, 86 insertions(+), 25 deletions(-)
> src/arch/mips/isa/decoder.isa |    2 -
> src/cpu/SConscript            |    2 +
> src/cpu/o3/bpred_unit.hh      |    5 +++
> src/cpu/o3/bpred_unit_impl.hh |   55 +++++++++++++++++++++++++++++++++++------
> src/cpu/o3/decode_impl.hh     |   29 +++++++++++++++------
> src/cpu/o3/fetch_impl.hh      |   16 ++++++-----
> src/cpu/o3/iew_impl.hh        |    2 -
>
> diffs (277 lines):
>
> diff -r 4d27997b548c -r f0841ee466a5 src/arch/mips/isa/decoder.isa
> --- a/src/arch/mips/isa/decoder.isa   Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/arch/mips/isa/decoder.isa   Sat Apr 18 10:42:29 2009 -0400
> @@ -135,7 +135,7 @@
>                      0x2: movz({{ Rd = (Rt == 0) ? Rs : Rd; }});
>                      0x3: movn({{ Rd = (Rt != 0) ? Rs : Rd; }});
>  #if FULL_SYSTEM
> -                  0x4: syscall({{
> +                    0x4: syscall({{
>                                     fault = new SystemCallFault();
>                                   }});
>  #else
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/SConscript
> --- a/src/cpu/SConscript      Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/SConscript      Sat Apr 18 10:42:29 2009 -0400
> @@ -177,3 +177,5 @@
>  
>  CompoundFlag('Exec', [ 'ExecEnable', 'ExecTicks', 'ExecOpClass', 
> 'ExecThread',
>      'ExecEffAddr', 'ExecResult', 'ExecSymbol', 'ExecMicro' ])
> +CompoundFlag('ExecNoTicks', [ 'ExecEnable', 'ExecOpClass', 'ExecThread',
> +    'ExecEffAddr', 'ExecResult', 'ExecMicro' ])
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/bpred_unit.hh
> --- a/src/cpu/o3/bpred_unit.hh        Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/o3/bpred_unit.hh        Sat Apr 18 10:42:29 2009 -0400
> @@ -188,6 +188,10 @@
>                wasCall(0), bpHistory(bp_history)
>          { }
>  
> +        bool operator==(const PredictorHistory &entry) const {
> +            return this->seqNum == entry.seqNum;
> +        }
> +
>          /** The sequence number for the predictor history entry. */
>          InstSeqNum seqNum;
>  
> @@ -220,6 +224,7 @@
>      };
>  
>      typedef std::list<PredictorHistory> History;
> +    typedef typename History::iterator HistoryIt;
>  
>      /**
>       * The per-thread predictor history. This is used to update the predictor
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/bpred_unit_impl.hh
> --- a/src/cpu/o3/bpred_unit_impl.hh   Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/o3/bpred_unit_impl.hh   Sat Apr 18 10:42:29 2009 -0400
> @@ -36,6 +36,8 @@
>  
>  #include "params/DerivO3CPU.hh"
>  
> +#include <algorithm>
> +
>  template<class Impl>
>  BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
>      : _name(params->name + ".BPredUnit"),
> @@ -173,6 +175,10 @@
>                  tid, pred_taken, inst->readPC());
>      }
>  
> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction 
> history "
> +                "for PC %#x\n",
> +            tid, inst->seqNum, inst->readPC());
> +
>      PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
>                                      bp_history, tid);
>  
> @@ -240,7 +246,8 @@
>  
>      predHist[tid].push_front(predict_record);
>  
> -    DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
> predHist[tid].size());
> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added."
> +            "predHist.size(): %i\n", tid, inst->seqNum, 
> predHist[tid].size());
>  
>      return pred_taken;
>  }
> @@ -249,7 +256,7 @@
>  void
>  BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
>  {
> -    DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until "
> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until "
>              "[sn:%lli].\n", tid, done_sn);
>  
>      while (!predHist[tid].empty() &&
> @@ -290,7 +297,12 @@
>          // This call should delete the bpHistory.
>          BPSquash(pred_hist.front().bpHistory);
>  
> +        DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
> +                "PC %#x.\n", tid, pred_hist.front().seqNum, 
> pred_hist.front().PC);
> +
>          pred_hist.pop_front();
> +
> +        DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
> predHist[tid].size());
>      }
>  
>  }
> @@ -305,6 +317,13 @@
>      // Now that we know that a branch was mispredicted, we need to undo
>      // all the branches that have been seen up until this branch and
>      // fix up everything.
> +    // NOTE: This should be call conceivably in 2 scenarios:
> +    // (1) After an branch is executed, it updates its status in the ROB
> +    //     The commit stage then checks the ROB update and sends a signal to
> +    //     the fetch stage to squash history after the mispredict
> +    // (2) In the decode stage, you can find out early if a unconditional
> +    //     PC-relative, branch was predicted incorrectly. If so, a signal
> +    //     to the fetch stage is sent to squash history after the mispredict
>  
>      History &pred_hist = predHist[tid];
>  
> @@ -314,22 +333,42 @@
>              "setting target to %#x.\n",
>              tid, squashed_sn, corr_target);
>  
> +    // Squash All Branches AFTER this mispredicted branch
>      squash(squashed_sn, tid);
>  
>      // If there's a squash due to a syscall, there may not be an entry
>      // corresponding to the squash.  In that case, don't bother trying to
>      // fix up the entry.
>      if (!pred_hist.empty()) {
> -        assert(pred_hist.front().seqNum == squashed_sn);
> -        if (pred_hist.front().usedRAS) {
> +
> +        HistoryIt hist_it = pred_hist.begin();
> +        //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
> +        //                       squashed_sn);
> +
> +        //assert(hist_it != pred_hist.end());
> +        if (pred_hist.front().seqNum != squashed_sn) {
> +            DPRINTF(Fetch, "Front sn %i != Squash sn %i\n",
> +                    pred_hist.front().seqNum, squashed_sn);
> +
> +            assert(pred_hist.front().seqNum == squashed_sn);
> +        }
> +
> +
> +        if ((*hist_it).usedRAS) {
>              ++RASIncorrect;
>          }
>  
> -        BPUpdate(pred_hist.front().PC, actually_taken,
> +        BPUpdate((*hist_it).PC, actually_taken,
>                   pred_hist.front().bpHistory);
>  
> -        BTB.update(pred_hist.front().PC, corr_target, tid);
> -        pred_hist.pop_front();
> +        BTB.update((*hist_it).PC, corr_target, tid);
> +
> +        DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
> +                "PC %#x.\n", tid, (*hist_it).seqNum, (*hist_it).PC);
> +
> +        pred_hist.erase(hist_it);
> +
> +        DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
> predHist[tid].size());
>      }
>  }
>  
> @@ -386,7 +425,7 @@
>  void
>  BPredUnit<Impl>::dump()
>  {
> -    typename History::iterator pred_hist_it;
> +    HistoryIt pred_hist_it;
>  
>      for (int i = 0; i < Impl::MaxThreads; ++i) {
>          if (!predHist[i].empty()) {
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/decode_impl.hh
> --- a/src/cpu/o3/decode_impl.hh       Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/o3/decode_impl.hh       Sat Apr 18 10:42:29 2009 -0400
> @@ -262,28 +262,30 @@
>  void
>  DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
>  {
> -    DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
> -            "detected at decode.\n", tid);
> +    DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch 
> prediction "
> +            "detected at decode.\n", tid, inst->seqNum);
>  
>      // Send back mispredict information.
>      toFetch->decodeInfo[tid].branchMispredict = true;
>      toFetch->decodeInfo[tid].predIncorrect = true;
> +    toFetch->decodeInfo[tid].squash = true;
>      toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
> -    toFetch->decodeInfo[tid].squash = true;
> -    toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
> -    ///FIXME There needs to be a way to set the nextPC and nextNPC
> -    ///explicitly for ISAs with delay slots.
> -    toFetch->decodeInfo[tid].nextNPC =
> -        inst->branchTarget() + sizeof(TheISA::MachInst);
>      toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
> +
>  #if ISA_HAS_DELAY_SLOT
> +    toFetch->decodeInfo[tid].nextPC = inst->readPC() + 
> sizeof(TheISA::MachInst);
> +    toFetch->decodeInfo[tid].nextNPC = inst->branchTarget();
>      toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
>          (inst->readNextPC() + sizeof(TheISA::MachInst));
>  #else
> +    toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
> +    toFetch->decodeInfo[tid].nextNPC =
> +        inst->branchTarget() + sizeof(TheISA::MachInst);
>      toFetch->decodeInfo[tid].branchTaken =
>          inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
>  #endif
>  
> +
>      InstSeqNum squash_seq_num = inst->seqNum;
>  
>      // Might have to tell fetch to unblock.
> @@ -738,8 +740,19 @@
>                  // a check at the end
>                  squash(inst, inst->threadNumber);
>                  Addr target = inst->branchTarget();
> +
> +#if ISA_HAS_DELAY_SLOT
> +                DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x  
> PredNextPC: %#x\n",
> +                        inst->seqNum, inst->readPC() + 
> sizeof(TheISA::MachInst), target);
> +
> +                //The micro pc after an instruction level branch should be 0
> +                inst->setPredTarg(inst->readPC() + sizeof(TheISA::MachInst), 
> target, 0);
> +#else
> +                DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x  
> PredNextPC: %#x\n",
> +                        inst->seqNum, target, target + 
> sizeof(TheISA::MachInst));
>                  //The micro pc after an instruction level branch should be 0
>                  inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 
> 0);
> +#endif
>                  break;
>              }
>          }
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/fetch_impl.hh
> --- a/src/cpu/o3/fetch_impl.hh        Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/o3/fetch_impl.hh        Sat Apr 18 10:42:29 2009 -0400
> @@ -524,12 +524,13 @@
>      Addr pred_PC = next_PC;
>      predict_taken = branchPred.predict(inst, pred_PC, tid);
>  
> -/*    if (predict_taken) {
> -        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
> -                tid, pred_PC);
> +    if (predict_taken) {
> +        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to 
> %#x.\n",
> +                tid, inst->seqNum, pred_PC);
>      } else {
> -        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
> -    }*/
> +        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not 
> taken.\n",
> +                tid, inst->seqNum);
> +    }
>  
>  #if ISA_HAS_DELAY_SLOT
>      next_PC = next_NPC;
> @@ -544,8 +545,9 @@
>          next_PC += instSize;
>      next_NPC = next_PC + instSize;
>  #endif
> -/*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then 
> %#x.\n",
> -            tid, next_PC, next_NPC);*/
> +
> +    DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %#x and then 
> %#x.\n",
> +            tid, inst->seqNum, next_PC, next_NPC);
>      inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
>      inst->setPredTaken(predict_taken);
>  
> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/iew_impl.hh
> --- a/src/cpu/o3/iew_impl.hh  Sat Apr 18 10:42:28 2009 -0400
> +++ b/src/cpu/o3/iew_impl.hh  Sat Apr 18 10:42:29 2009 -0400
> @@ -1282,7 +1282,7 @@
>                  fetchRedirect[tid] = true;
>  
>                  DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
> -                DPRINTF(IEW, "Predicted target was %#x, %#x.\n",
> +                DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
>                          inst->readPredPC(), inst->readPredNPC());
>                  DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
>                          " NPC: %#x.\n", inst->readNextPC(),
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>   

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to