Two things:
(1) arrrghhh....didnt do a good job all the way with this patch... I
left some comments and things I didnt want to include. So maybe I'll
just do another "clean up comments/DPRINTS" changeset ...

(2) Gabe, I think you understand what's going on here OK. O3 already
checked for an unconditional branch in decode and set the target.
However, it does this check by seeing if the instruction has a
unconditional control flag set. If it does, then it will go ahead and
update the branch prediction early (in decode) rather than wait until
Execute. It didnt affect SPARC I believe since that flag never gets
set so SPARC misses out on that optimization (which more than likely
will be offset by a BTB entry though as the program warms up)... The
squash in decode is only called when a branch mispredict is detected
in decode so yea you're right in that what we had to do is make the
special case for delay slots just like we did in other areas of the O3
code.

On Sat, Apr 18, 2009 at 6:22 PM, Gabe Black <[email protected]> wrote:
> This looks good, but just so I understand what changes were made, you're
> making the squash stuff less picky and setting the target for
> unconditional branches in made in decode consider delay slots, right?
>
> Gabe
>
> Korey Sewell wrote:
>> changeset f0841ee466a5 in /z/repo/m5
>> details: http://repo.m5sim.org/m5?cmd=changeset;node=f0841ee466a5
>> description:
>>       o3-delay-slot-bpred: fix decode stage handling of uncdtl. branches.\n 
>> decode stage was not setting the predicted PC correctly or passing that 
>> information back to fetch correctly
>>
>> diffstat:
>>
>> 7 files changed, 86 insertions(+), 25 deletions(-)
>> src/arch/mips/isa/decoder.isa |    2 -
>> src/cpu/SConscript            |    2 +
>> src/cpu/o3/bpred_unit.hh      |    5 +++
>> src/cpu/o3/bpred_unit_impl.hh |   55 
>> +++++++++++++++++++++++++++++++++++------
>> src/cpu/o3/decode_impl.hh     |   29 +++++++++++++++------
>> src/cpu/o3/fetch_impl.hh      |   16 ++++++-----
>> src/cpu/o3/iew_impl.hh        |    2 -
>>
>> diffs (277 lines):
>>
>> diff -r 4d27997b548c -r f0841ee466a5 src/arch/mips/isa/decoder.isa
>> --- a/src/arch/mips/isa/decoder.isa   Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/arch/mips/isa/decoder.isa   Sat Apr 18 10:42:29 2009 -0400
>> @@ -135,7 +135,7 @@
>>                      0x2: movz({{ Rd = (Rt == 0) ? Rs : Rd; }});
>>                      0x3: movn({{ Rd = (Rt != 0) ? Rs : Rd; }});
>>  #if FULL_SYSTEM
>> -                  0x4: syscall({{
>> +                    0x4: syscall({{
>>                                     fault = new SystemCallFault();
>>                                   }});
>>  #else
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/SConscript
>> --- a/src/cpu/SConscript      Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/SConscript      Sat Apr 18 10:42:29 2009 -0400
>> @@ -177,3 +177,5 @@
>>
>>  CompoundFlag('Exec', [ 'ExecEnable', 'ExecTicks', 'ExecOpClass', 
>> 'ExecThread',
>>      'ExecEffAddr', 'ExecResult', 'ExecSymbol', 'ExecMicro' ])
>> +CompoundFlag('ExecNoTicks', [ 'ExecEnable', 'ExecOpClass', 'ExecThread',
>> +    'ExecEffAddr', 'ExecResult', 'ExecMicro' ])
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/bpred_unit.hh
>> --- a/src/cpu/o3/bpred_unit.hh        Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/o3/bpred_unit.hh        Sat Apr 18 10:42:29 2009 -0400
>> @@ -188,6 +188,10 @@
>>                wasCall(0), bpHistory(bp_history)
>>          { }
>>
>> +        bool operator==(const PredictorHistory &entry) const {
>> +            return this->seqNum == entry.seqNum;
>> +        }
>> +
>>          /** The sequence number for the predictor history entry. */
>>          InstSeqNum seqNum;
>>
>> @@ -220,6 +224,7 @@
>>      };
>>
>>      typedef std::list<PredictorHistory> History;
>> +    typedef typename History::iterator HistoryIt;
>>
>>      /**
>>       * The per-thread predictor history. This is used to update the 
>> predictor
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/bpred_unit_impl.hh
>> --- a/src/cpu/o3/bpred_unit_impl.hh   Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/o3/bpred_unit_impl.hh   Sat Apr 18 10:42:29 2009 -0400
>> @@ -36,6 +36,8 @@
>>
>>  #include "params/DerivO3CPU.hh"
>>
>> +#include <algorithm>
>> +
>>  template<class Impl>
>>  BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
>>      : _name(params->name + ".BPredUnit"),
>> @@ -173,6 +175,10 @@
>>                  tid, pred_taken, inst->readPC());
>>      }
>>
>> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction 
>> history "
>> +                "for PC %#x\n",
>> +            tid, inst->seqNum, inst->readPC());
>> +
>>      PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
>>                                      bp_history, tid);
>>
>> @@ -240,7 +246,8 @@
>>
>>      predHist[tid].push_front(predict_record);
>>
>> -    DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
>> predHist[tid].size());
>> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added."
>> +            "predHist.size(): %i\n", tid, inst->seqNum, 
>> predHist[tid].size());
>>
>>      return pred_taken;
>>  }
>> @@ -249,7 +256,7 @@
>>  void
>>  BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
>>  {
>> -    DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until "
>> +    DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until "
>>              "[sn:%lli].\n", tid, done_sn);
>>
>>      while (!predHist[tid].empty() &&
>> @@ -290,7 +297,12 @@
>>          // This call should delete the bpHistory.
>>          BPSquash(pred_hist.front().bpHistory);
>>
>> +        DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
>> +                "PC %#x.\n", tid, pred_hist.front().seqNum, 
>> pred_hist.front().PC);
>> +
>>          pred_hist.pop_front();
>> +
>> +        DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
>> predHist[tid].size());
>>      }
>>
>>  }
>> @@ -305,6 +317,13 @@
>>      // Now that we know that a branch was mispredicted, we need to undo
>>      // all the branches that have been seen up until this branch and
>>      // fix up everything.
>> +    // NOTE: This should be call conceivably in 2 scenarios:
>> +    // (1) After an branch is executed, it updates its status in the ROB
>> +    //     The commit stage then checks the ROB update and sends a signal to
>> +    //     the fetch stage to squash history after the mispredict
>> +    // (2) In the decode stage, you can find out early if a unconditional
>> +    //     PC-relative, branch was predicted incorrectly. If so, a signal
>> +    //     to the fetch stage is sent to squash history after the mispredict
>>
>>      History &pred_hist = predHist[tid];
>>
>> @@ -314,22 +333,42 @@
>>              "setting target to %#x.\n",
>>              tid, squashed_sn, corr_target);
>>
>> +    // Squash All Branches AFTER this mispredicted branch
>>      squash(squashed_sn, tid);
>>
>>      // If there's a squash due to a syscall, there may not be an entry
>>      // corresponding to the squash.  In that case, don't bother trying to
>>      // fix up the entry.
>>      if (!pred_hist.empty()) {
>> -        assert(pred_hist.front().seqNum == squashed_sn);
>> -        if (pred_hist.front().usedRAS) {
>> +
>> +        HistoryIt hist_it = pred_hist.begin();
>> +        //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
>> +        //                       squashed_sn);
>> +
>> +        //assert(hist_it != pred_hist.end());
>> +        if (pred_hist.front().seqNum != squashed_sn) {
>> +            DPRINTF(Fetch, "Front sn %i != Squash sn %i\n",
>> +                    pred_hist.front().seqNum, squashed_sn);
>> +
>> +            assert(pred_hist.front().seqNum == squashed_sn);
>> +        }
>> +
>> +
>> +        if ((*hist_it).usedRAS) {
>>              ++RASIncorrect;
>>          }
>>
>> -        BPUpdate(pred_hist.front().PC, actually_taken,
>> +        BPUpdate((*hist_it).PC, actually_taken,
>>                   pred_hist.front().bpHistory);
>>
>> -        BTB.update(pred_hist.front().PC, corr_target, tid);
>> -        pred_hist.pop_front();
>> +        BTB.update((*hist_it).PC, corr_target, tid);
>> +
>> +        DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
>> +                "PC %#x.\n", tid, (*hist_it).seqNum, (*hist_it).PC);
>> +
>> +        pred_hist.erase(hist_it);
>> +
>> +        DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, 
>> predHist[tid].size());
>>      }
>>  }
>>
>> @@ -386,7 +425,7 @@
>>  void
>>  BPredUnit<Impl>::dump()
>>  {
>> -    typename History::iterator pred_hist_it;
>> +    HistoryIt pred_hist_it;
>>
>>      for (int i = 0; i < Impl::MaxThreads; ++i) {
>>          if (!predHist[i].empty()) {
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/decode_impl.hh
>> --- a/src/cpu/o3/decode_impl.hh       Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/o3/decode_impl.hh       Sat Apr 18 10:42:29 2009 -0400
>> @@ -262,28 +262,30 @@
>>  void
>>  DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
>>  {
>> -    DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction 
>> "
>> -            "detected at decode.\n", tid);
>> +    DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch 
>> prediction "
>> +            "detected at decode.\n", tid, inst->seqNum);
>>
>>      // Send back mispredict information.
>>      toFetch->decodeInfo[tid].branchMispredict = true;
>>      toFetch->decodeInfo[tid].predIncorrect = true;
>> +    toFetch->decodeInfo[tid].squash = true;
>>      toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
>> -    toFetch->decodeInfo[tid].squash = true;
>> -    toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
>> -    ///FIXME There needs to be a way to set the nextPC and nextNPC
>> -    ///explicitly for ISAs with delay slots.
>> -    toFetch->decodeInfo[tid].nextNPC =
>> -        inst->branchTarget() + sizeof(TheISA::MachInst);
>>      toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
>> +
>>  #if ISA_HAS_DELAY_SLOT
>> +    toFetch->decodeInfo[tid].nextPC = inst->readPC() + 
>> sizeof(TheISA::MachInst);
>> +    toFetch->decodeInfo[tid].nextNPC = inst->branchTarget();
>>      toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
>>          (inst->readNextPC() + sizeof(TheISA::MachInst));
>>  #else
>> +    toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
>> +    toFetch->decodeInfo[tid].nextNPC =
>> +        inst->branchTarget() + sizeof(TheISA::MachInst);
>>      toFetch->decodeInfo[tid].branchTaken =
>>          inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
>>  #endif
>>
>> +
>>      InstSeqNum squash_seq_num = inst->seqNum;
>>
>>      // Might have to tell fetch to unblock.
>> @@ -738,8 +740,19 @@
>>                  // a check at the end
>>                  squash(inst, inst->threadNumber);
>>                  Addr target = inst->branchTarget();
>> +
>> +#if ISA_HAS_DELAY_SLOT
>> +                DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x 
>>  PredNextPC: %#x\n",
>> +                        inst->seqNum, inst->readPC() + 
>> sizeof(TheISA::MachInst), target);
>> +
>> +                //The micro pc after an instruction level branch should be 0
>> +                inst->setPredTarg(inst->readPC() + 
>> sizeof(TheISA::MachInst), target, 0);
>> +#else
>> +                DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x 
>>  PredNextPC: %#x\n",
>> +                        inst->seqNum, target, target + 
>> sizeof(TheISA::MachInst));
>>                  //The micro pc after an instruction level branch should be 0
>>                  inst->setPredTarg(target, target + 
>> sizeof(TheISA::MachInst), 0);
>> +#endif
>>                  break;
>>              }
>>          }
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/fetch_impl.hh
>> --- a/src/cpu/o3/fetch_impl.hh        Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/o3/fetch_impl.hh        Sat Apr 18 10:42:29 2009 -0400
>> @@ -524,12 +524,13 @@
>>      Addr pred_PC = next_PC;
>>      predict_taken = branchPred.predict(inst, pred_PC, tid);
>>
>> -/*    if (predict_taken) {
>> -        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
>> -                tid, pred_PC);
>> +    if (predict_taken) {
>> +        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to 
>> %#x.\n",
>> +                tid, inst->seqNum, pred_PC);
>>      } else {
>> -        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", 
>> tid);
>> -    }*/
>> +        DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not 
>> taken.\n",
>> +                tid, inst->seqNum);
>> +    }
>>
>>  #if ISA_HAS_DELAY_SLOT
>>      next_PC = next_NPC;
>> @@ -544,8 +545,9 @@
>>          next_PC += instSize;
>>      next_NPC = next_PC + instSize;
>>  #endif
>> -/*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then 
>> %#x.\n",
>> -            tid, next_PC, next_NPC);*/
>> +
>> +    DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %#x and 
>> then %#x.\n",
>> +            tid, inst->seqNum, next_PC, next_NPC);
>>      inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
>>      inst->setPredTaken(predict_taken);
>>
>> diff -r 4d27997b548c -r f0841ee466a5 src/cpu/o3/iew_impl.hh
>> --- a/src/cpu/o3/iew_impl.hh  Sat Apr 18 10:42:28 2009 -0400
>> +++ b/src/cpu/o3/iew_impl.hh  Sat Apr 18 10:42:29 2009 -0400
>> @@ -1282,7 +1282,7 @@
>>                  fetchRedirect[tid] = true;
>>
>>                  DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
>> -                DPRINTF(IEW, "Predicted target was %#x, %#x.\n",
>> +                DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
>>                          inst->readPredPC(), inst->readPredNPC());
>>                  DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
>>                          " NPC: %#x.\n", inst->readNextPC(),
>> _______________________________________________
>> m5-dev mailing list
>> [email protected]
>> http://m5sim.org/mailman/listinfo/m5-dev
>>
>
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>



-- 
----------
Korey L Sewell
Graduate Student - PhD Candidate
Computer Science & Engineering
University of Michigan
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to