changeset f94c14fd6561 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=f94c14fd6561
description:
        cpu: disallow speculative update of branch predictor tables (o3)

        The Minor and o3 cpu models share the branch prediction
        code. Minor relies on the BPredUnit::squash() function
        to update the branch predictor tables on a branch mispre-
        diction. This is fine because Minor executes in-order, so
        the update is on the correct path. However, this causes the
        branch predictor to be updated on out-of-order branch
        mispredictions when using the o3 model, which should not
        be the case.

        This patch guards against speculative update of the branch
        prediction tables. On a branch misprediction, BPredUnit::squash()
        calls BpredUnit::update(..., squashed = true). The underlying
        branch predictor tests against the value of squashed. If it is
        true, it restores any speculatively updated internal state
        it might have (e.g., global/local branch history), then returns.
        If false, it updates its prediction tables. Previously, exist-
        ing predictors did not test against the "squashed" parameter.

        To accomodate for this change, the Minor model must now call
        BPredUnit::squash() then BPredUnit::update(..., squashed = false)
        on branch mispredictions. Before, calling BpredUnit::squash()
        performed the prediction tables update.

        The effect is a slight MPKI improvement when using the o3
        model. A further patch should perform the same modifications
        for the indirect target predictor and BTB (less critical).

        Signed-off-by: Jason Lowe-Power <[email protected]>

diffstat:

 src/cpu/minor/fetch2.cc    |    8 ++
 src/cpu/pred/2bit_local.cc |    6 +
 src/cpu/pred/2bit_local.hh |    3 -
 src/cpu/pred/bi_mode.cc    |  103 ++++++++++++++----------------
 src/cpu/pred/bi_mode.hh    |    1 -
 src/cpu/pred/bpred_unit.cc |   25 ++++--
 src/cpu/pred/bpred_unit.hh |   13 +---
 src/cpu/pred/tournament.cc |  151 +++++++++++++++++++-------------------------
 src/cpu/pred/tournament.hh |    2 -
 9 files changed, 142 insertions(+), 170 deletions(-)

diffs (truncated from 457 to 300 lines):

diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/minor/fetch2.cc
--- a/src/cpu/minor/fetch2.cc   Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/minor/fetch2.cc   Wed Dec 21 15:07:16 2016 -0600
@@ -152,6 +152,10 @@
         DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst);
         branchPredictor.squash(inst->id.fetchSeqNum,
             branch.target, true, inst->id.threadId);
+        // Update after squashing to accomodate O3CPU
+        // using the branch prediction code.
+        branchPredictor.update(inst->id.fetchSeqNum,
+            inst->id.threadId);
         break;
       case BranchData::CorrectlyPredictedBranch:
         /* Predicted taken, was taken */
@@ -164,6 +168,10 @@
         DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst);
         branchPredictor.squash(inst->id.fetchSeqNum,
             branch.target /* Not used */, false, inst->id.threadId);
+        // Update after squashing to accomodate O3CPU
+        // using the branch prediction code.
+        branchPredictor.update(inst->id.fetchSeqNum,
+            inst->id.threadId);
         break;
       case BranchData::BadlyPredictedBranchTarget:
         /* Predicted taken, was taken but to a different target */
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/2bit_local.cc
--- a/src/cpu/pred/2bit_local.cc        Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/2bit_local.cc        Wed Dec 21 15:07:16 2016 -0600
@@ -123,6 +123,12 @@
     assert(bp_history == NULL);
     unsigned local_predictor_idx;
 
+    // No state to restore, and we do not update on the wrong
+    // path.
+    if (squashed) {
+        return;
+    }
+
     // Update the local predictor.
     local_predictor_idx = getLocalIndex(branch_addr);
 
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/2bit_local.hh
--- a/src/cpu/pred/2bit_local.hh        Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/2bit_local.hh        Wed Dec 21 15:07:16 2016 -0600
@@ -94,9 +94,6 @@
     void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
                 bool squashed);
 
-    void retireSquashed(ThreadID tid, void *bp_history)
-    { assert(bp_history == NULL); }
-
     void squash(ThreadID tid, void *bp_history)
     { assert(bp_history == NULL); }
 
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/bi_mode.cc
--- a/src/cpu/pred/bi_mode.cc   Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/bi_mode.cc   Wed Dec 21 15:07:16 2016 -0600
@@ -162,78 +162,69 @@
 BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory,
                  bool squashed)
 {
-    if (bpHistory) {
-        BPHistory *history = static_cast<BPHistory*>(bpHistory);
+    assert(bpHistory);
 
-        unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt)
-                                    & choiceHistoryMask);
-        unsigned globalHistoryIdx = (((branchAddr >> instShiftAmt)
-                                    ^ history->globalHistoryReg)
-                                    & globalHistoryMask);
+    BPHistory *history = static_cast<BPHistory*>(bpHistory);
 
-        assert(choiceHistoryIdx < choicePredictorSize);
-        assert(globalHistoryIdx < globalPredictorSize);
+    // We do not update the counters speculatively on a squash.
+    // We just restore the global history register.
+    if (squashed) {
+        globalHistoryReg[tid] = (history->globalHistoryReg << 1) | taken;
+        return;
+    }
 
-        if (history->takenUsed) {
-            // if the taken array's prediction was used, update it
-            if (taken) {
-                takenCounters[globalHistoryIdx].increment();
-            } else {
-                takenCounters[globalHistoryIdx].decrement();
-            }
+    unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt)
+                                & choiceHistoryMask);
+    unsigned globalHistoryIdx = (((branchAddr >> instShiftAmt)
+                                ^ history->globalHistoryReg)
+                                & globalHistoryMask);
+
+    assert(choiceHistoryIdx < choicePredictorSize);
+    assert(globalHistoryIdx < globalPredictorSize);
+
+    if (history->takenUsed) {
+        // if the taken array's prediction was used, update it
+        if (taken) {
+            takenCounters[globalHistoryIdx].increment();
         } else {
-            // if the not-taken array's prediction was used, update it
-            if (taken) {
-                notTakenCounters[globalHistoryIdx].increment();
-            } else {
-                notTakenCounters[globalHistoryIdx].decrement();
-            }
+            takenCounters[globalHistoryIdx].decrement();
         }
+    } else {
+        // if the not-taken array's prediction was used, update it
+        if (taken) {
+            notTakenCounters[globalHistoryIdx].increment();
+        } else {
+            notTakenCounters[globalHistoryIdx].decrement();
+        }
+    }
 
-        if (history->finalPred == taken) {
-            /* If the final prediction matches the actual branch's
-             * outcome and the choice predictor matches the final
-             * outcome, we update the choice predictor, otherwise it
-             * is not updated. While the designers of the bi-mode
-             * predictor don't explicity say why this is done, one
-             * can infer that it is to preserve the choice predictor's
-             * bias with respect to the branch being predicted; afterall,
-             * the whole point of the bi-mode predictor is to identify the
-             * atypical case when a branch deviates from its bias.
-             */
-            if (history->finalPred == history->takenUsed) {
-                if (taken) {
-                    choiceCounters[choiceHistoryIdx].increment();
-                } else {
-                    choiceCounters[choiceHistoryIdx].decrement();
-                }
-            }
-        } else {
-            // always update the choice predictor on an incorrect prediction
+    if (history->finalPred == taken) {
+       /* If the final prediction matches the actual branch's
+        * outcome and the choice predictor matches the final
+        * outcome, we update the choice predictor, otherwise it
+        * is not updated. While the designers of the bi-mode
+        * predictor don't explicity say why this is done, one
+        * can infer that it is to preserve the choice predictor's
+        * bias with respect to the branch being predicted; afterall,
+        * the whole point of the bi-mode predictor is to identify the
+        * atypical case when a branch deviates from its bias.
+        */
+        if (history->finalPred == history->takenUsed) {
             if (taken) {
                 choiceCounters[choiceHistoryIdx].increment();
             } else {
                 choiceCounters[choiceHistoryIdx].decrement();
             }
         }
-
-        if (squashed) {
-            if (taken) {
-                globalHistoryReg[tid] = (history->globalHistoryReg << 1) | 1;
-            } else {
-                globalHistoryReg[tid] = (history->globalHistoryReg << 1);
-            }
-            globalHistoryReg[tid] &= historyRegisterMask;
+    } else {
+        // always update the choice predictor on an incorrect prediction
+        if (taken) {
+            choiceCounters[choiceHistoryIdx].increment();
         } else {
-            delete history;
+            choiceCounters[choiceHistoryIdx].decrement();
         }
     }
-}
 
-void
-BiModeBP::retireSquashed(ThreadID tid, void *bp_history)
-{
-    BPHistory *history = static_cast<BPHistory*>(bp_history);
     delete history;
 }
 
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/bi_mode.hh
--- a/src/cpu/pred/bi_mode.hh   Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/bi_mode.hh   Wed Dec 21 15:07:16 2016 -0600
@@ -63,7 +63,6 @@
     void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
     void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
                 bool squashed);
-    void retireSquashed(ThreadID tid, void *bp_history);
     unsigned getGHR(ThreadID tid, void *bp_history) const;
 
   private:
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/bpred_unit.cc
--- a/src/cpu/pred/bpred_unit.cc        Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/bpred_unit.cc        Wed Dec 21 15:07:16 2016 -0600
@@ -330,13 +330,9 @@
     while (!predHist[tid].empty() &&
            predHist[tid].back().seqNum <= done_sn) {
         // Update the branch predictor with the correct results.
-        if (!predHist[tid].back().wasSquashed) {
-            update(tid, predHist[tid].back().pc,
-                        predHist[tid].back().predTaken,
-                        predHist[tid].back().bpHistory, false);
-        } else {
-            retireSquashed(tid, predHist[tid].back().bpHistory);
-        }
+        update(tid, predHist[tid].back().pc,
+                    predHist[tid].back().predTaken,
+                    predHist[tid].back().bpHistory, false);
 
         predHist[tid].pop_back();
     }
@@ -430,12 +426,23 @@
                     tid, hist_it->seqNum);
         }
 
-        // Have to get GHR here because the update deletes bpHistory
+        // Get the underlying Global History Register
         unsigned ghr = getGHR(tid, hist_it->bpHistory);
 
+        // There are separate functions for in-order and out-of-order
+        // branch prediction, but not for update. Therefore, this
+        // call should take into account that the mispredicted branch may
+        // be on the wrong path (i.e., OoO execution), and that the counter
+        // counter table(s) should not be updated. Thus, this call should
+        // restore the state of the underlying predictor, for instance the
+        // local/global histories. The counter tables will be updated when
+        // the branch actually commits.
+
+        // Remember the correct direction for the update at commit.
+        pred_hist.front().predTaken = actually_taken;
+
         update(tid, (*hist_it).pc, actually_taken,
                pred_hist.front().bpHistory, true);
-        hist_it->wasSquashed = true;
 
         if (actually_taken) {
             if (hist_it->wasReturn && !hist_it->usedRAS) {
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/bpred_unit.hh
--- a/src/cpu/pred/bpred_unit.hh        Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/bpred_unit.hh        Wed Dec 21 15:07:16 2016 -0600
@@ -179,14 +179,6 @@
      */
     virtual void update(ThreadID tid, Addr instPC, bool taken,
                         void *bp_history, bool squashed) = 0;
-     /**
-     * Deletes the associated history with a branch, performs no predictor
-     * updates.  Used for branches that mispredict and update tables but
-     * are still speculative and later retire.
-     * @param bp_history History to delete associated with this predictor
-     */
-    virtual void retireSquashed(ThreadID tid, void *bp_history) = 0;
-
     /**
      * Updates the BTB with the target of a branch.
      * @param inst_PC The branch's PC that will be updated.
@@ -211,7 +203,7 @@
                          ThreadID _tid)
             : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
               RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), 
pushedRAS(0),
-              wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0)
+              wasCall(0), wasReturn(0), wasIndirect(0)
         {}
 
         bool operator==(const PredictorHistory &entry) const {
@@ -254,9 +246,6 @@
         /** Whether or not the instruction was a return. */
         bool wasReturn;
 
-        /** Whether this instruction has already mispredicted/updated bp */
-        bool wasSquashed;
-
         /** Wether this instruction was an indirect branch */
         bool wasIndirect;
     };
diff -r c2e1ead33662 -r f94c14fd6561 src/cpu/pred/tournament.cc
--- a/src/cpu/pred/tournament.cc        Wed Dec 21 15:06:13 2016 -0600
+++ b/src/cpu/pred/tournament.cc        Wed Dec 21 15:07:16 2016 -0600
@@ -267,100 +267,77 @@
 TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken,
                      void *bp_history, bool squashed)
 {
-    unsigned local_history_idx;
-    unsigned local_predictor_idx M5_VAR_USED;
-    unsigned local_predictor_hist;
+    assert(bp_history);
 
-    // Get the local predictor's current prediction
-    local_history_idx = calcLocHistIdx(branch_addr);
-    local_predictor_hist = localHistoryTable[local_history_idx];
-    local_predictor_idx = local_predictor_hist & localPredictorMask;
+    BPHistory *history = static_cast<BPHistory *>(bp_history);
 
-    if (bp_history) {
-        BPHistory *history = static_cast<BPHistory *>(bp_history);
-        // Update may also be called if the Branch target is incorrect even if
-        // the prediction is correct. In that case do not update the counters.
-        bool historyPred = false;
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to