Jairo Balart has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/15335

Change subject: cpu: ITTAGE indirect branch predictor model
......................................................................

cpu: ITTAGE indirect branch predictor model

Change-Id: Ie0ee7fe31ffe5177042ffddd61d2099bcf14b9cb
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
M src/cpu/pred/bpred_unit.cc
M src/cpu/pred/indirect.cc
M src/cpu/pred/indirect.hh
M src/cpu/pred/indirect_base.hh
A src/cpu/pred/ittage.cc
A src/cpu/pred/ittage.hh
8 files changed, 846 insertions(+), 4 deletions(-)



diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py
index 1e37ab3..040a908 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -423,3 +423,16 @@

     logInb = 7

+class ITTAGE(IndirectPredictorBase):
+    type = 'ITTAGE'
+    cxx_class = 'ITTAGE'
+    cxx_header = "cpu/pred/ittage.hh"
+
+    nHistoryTables = Param.Unsigned(15, "Number of history tables")
+    minHist = Param.Unsigned(8, "Minimum history size")
+    maxHist = Param.Unsigned(2000, "Maximum history size")
+    histBufferSize = Param.Unsigned(4096,
+            "A large number to track all branch histories")
+    histLengths = VectorParam.Int(
+        [0, 0, 10, 16, 27, 44, 60, 96, 109, 219, 449, 487, 714, 1313, 2146,
+         3881], "History lengths")
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index a30ad02..07f82cc 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -41,6 +41,7 @@
 Source('btb.cc')
 Source('indirect.cc')
 Source('indirect_base.cc')
+Source('ittage.cc')
 Source('ras.cc')
 Source('tournament.cc')
 Source ('bi_mode.cc')
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index d8a268a..7edcfe7 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -277,7 +277,8 @@
                 predict_record.wasIndirect = true;
                 ++indirectLookups;
                 //Consult indirect predictor on indirect control
-                if (iPred->lookup(pc.instAddr(), target, tid)) {
+                if (iPred->lookup(pc.instAddr(), target, tid,
+                    indirect_history)) {
                     // Indirect predictor hit
                     ++indirectHits;
                     DPRINTF(Branch, "[tid:%i]: Instruction %s predicted "
@@ -297,6 +298,9 @@
                 }
iPred->recordIndirect(pc.instAddr(), target.instAddr(), seqNum,
                         tid);
+                iPred->historyUpdate(tid, pc.instAddr(), pred_taken,
+                                     indirect_history, inst,
+                                     target.instAddr());
             }
         }
     } else {
diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc
index be9a59d..1fb2db1 100644
--- a/src/cpu/pred/indirect.cc
+++ b/src/cpu/pred/indirect.cc
@@ -77,7 +77,7 @@

 bool
 IndirectPredictor::lookup(Addr br_addr, TheISA::PCState& target,
-    ThreadID tid)
+    ThreadID tid, void *& bp_history)
 {
     Addr set_index = getSetIndex(br_addr, threadInfo[tid].ghr, tid);
     Addr tag = getTag(br_addr);
diff --git a/src/cpu/pred/indirect.hh b/src/cpu/pred/indirect.hh
index 37442e6..a9d98df 100644
--- a/src/cpu/pred/indirect.hh
+++ b/src/cpu/pred/indirect.hh
@@ -43,7 +43,8 @@
 {
   public:
     IndirectPredictor(const IndirectPredictorParams * params);
-    bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid);
+    bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+                void *& bp_history);
     void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
                         ThreadID tid);
     void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
diff --git a/src/cpu/pred/indirect_base.hh b/src/cpu/pred/indirect_base.hh
index e8ffcf6..c4e9d01 100644
--- a/src/cpu/pred/indirect_base.hh
+++ b/src/cpu/pred/indirect_base.hh
@@ -49,7 +49,7 @@
     }

     virtual bool lookup(Addr br_addr, TheISA::PCState& br_target,
-                        ThreadID tid) = 0;
+                        ThreadID tid, void *& bp_history) = 0;
     virtual void recordIndirect(Addr br_addr, Addr tgt_addr,
                                 InstSeqNum seq_num, ThreadID tid) = 0;
     virtual void commit(InstSeqNum seq_num, ThreadID tid,
@@ -64,6 +64,12 @@
                                            bool actually_taken) = 0;
     virtual void deleteDirectionInfo(ThreadID tid,
                                      void * indirect_history) = 0;
+
+    virtual void historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+ void * bp_history, const StaticInstPtr &inst,
+                               Addr target)
+    {
+    }
 };

 #endif // __CPU_PRED_INDIRECT_BASE_HH__
diff --git a/src/cpu/pred/ittage.cc b/src/cpu/pred/ittage.cc
new file mode 100644
index 0000000..ecbdd4c
--- /dev/null
+++ b/src/cpu/pred/ittage.cc
@@ -0,0 +1,575 @@
+#include "cpu/pred/ittage.hh"
+
+#include "base/random.hh"
+#include "cpu/static_inst.hh"
+
+ITTAGE::ITTAGE(const ITTAGEParams * params)
+    : IndirectPredictorBase(params),
+      threadHistory(params->numThreads),
+      useAltOnNA(0),
+      seed(0),
+      ptIumRetire(0),
+      ptIumFetch(0),
+      nHistoryTables(params->nHistoryTables),
+      histLengths(params->histLengths)
+{
+    for (auto & history : threadHistory) {
+        for (int i = 0; i < HISTBUFFERLENGTH; i++) {
+            history.ghist[0] = 0;
+        }
+    }
+
+    logTagTableSizes.resize(nHistoryTables + 1);
+    logTagTableSizes[0] = LOGG;
+    logTagTableSizes[1] = LOGG;
+    logTagTableSizes[STEP1] = LOGG;
+    logTagTableSizes[STEP2] = LOGG - 1;
+
+    // Grouped together with T1 4K entries
+    for (int i = 2; i <= STEP1 - 1; i++) {
+        logTagTableSizes[i] = logTagTableSizes[1] - 3;
+    }
+
+    // Grouped together with T4 4K entries
+    for (int i = STEP1 + 1; i <= STEP2 - 1; i++) {
+        logTagTableSizes[i] = logTagTableSizes[STEP1] - 3;
+    }
+
+    // Grouped together with T12 2K entries
+    for (int i = STEP2 + 1; i <= nHistoryTables; i++) {
+        logTagTableSizes[i] = logTagTableSizes[STEP2] - 3;
+    }
+
+    gtable = new ITTageEntry * [nHistoryTables + 1];
+    gtable[0] = new ITTageEntry[1 << logTagTableSizes[0]];
+    gtable[1] = new ITTageEntry[1 << logTagTableSizes[1]];
+    gtable[STEP1] = new ITTageEntry[1 << logTagTableSizes[STEP1]];
+    gtable[STEP2] = new ITTageEntry[1 << logTagTableSizes[STEP2]];
+    tagTableTagWidths.resize(nHistoryTables + 1);
+    tagTableTagWidths[0] = 0; // T0 is tagless
+
+    for (int i = 1; i <= STEP1 - 1; i++) {
+        gtable[i] = gtable[1];
+        tagTableTagWidths[i] = 9;
+    }
+
+    for (int i = STEP1; i <= STEP2 - 1; i++) {
+        gtable[i] = gtable[STEP1];
+        tagTableTagWidths[i] = 13;
+    }
+
+    for (int i = STEP2; i <= nHistoryTables; i++) {
+        gtable[i] = gtable[STEP2];
+        tagTableTagWidths[i] = 15;
+    }
+
+    for (auto & history : threadHistory) {
+        // Initialisation of index and tag computation functions
+ history.fetchComputeIndices = new FoldedHistorytmp[nHistoryTables + 1]; + history.fetchComputeTags[0] = new FoldedHistorytmp[nHistoryTables + 1]; + history.fetchComputeTags[1] = new FoldedHistorytmp[nHistoryTables + 1];
+
+        for (int i = 1; i <= nHistoryTables; i++) {
+            history.fetchComputeIndices[i].init(histLengths[i],
+                                                logTagTableSizes[i]);
+            history.fetchComputeTags[0][i].init(
+                history.fetchComputeIndices[i].origLength,
+                tagTableTagWidths[i]);
+            history.fetchComputeTags[1][i].init(
+                history.fetchComputeIndices[i].origLength,
+                tagTableTagWidths[i] - 1);
+        }
+
+        history.fetchPtGhist = 0;
+        history.retireComputeIndices =
+            new FoldedHistorytmp[nHistoryTables + 1];
+        history.retireComputeTags[0] =
+            new FoldedHistorytmp[nHistoryTables + 1];
+        history.retireComputeTags[1] =
+            new FoldedHistorytmp[nHistoryTables + 1];
+
+        for (int i = 1; i <= nHistoryTables; i++) {
+            history.retireComputeIndices[i].init(histLengths[i],
+                                                 logTagTableSizes[i]);
+            history.retireComputeTags[0][i].init(
+                history.retireComputeIndices[i].origLength,
+                tagTableTagWidths[i]);
+            history.retireComputeTags[1][i].init(
+                history.retireComputeIndices[i].origLength,
+                tagTableTagWidths[i] - 1);
+        }
+
+        history.retirePtGhist = 0;
+    }
+
+    regionTable = new RegionEntry[128];
+    IUMTable = new IUMEntry[1 << LOGSPEC];
+    tableIndices = new int [nHistoryTables + 1];
+    tableTags = new int [nHistoryTables + 1];
+}
+
+void
+ITTAGE::updateDirectionInfo(ThreadID tid, bool taken, void *& indirect_history)
+{
+    ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+    indirect_history = (void *)(bi);
+    bi->taken = taken;
+}
+
+bool
+ITTAGE::lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+               void *& bp_history)
+{
+    br_target = predict(tid, br_addr, bp_history);
+    return br_target != 0;
+}
+
+Addr
+ITTAGE::predict(ThreadID tid, Addr branch_pc, void *& b)
+{
+    //ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+    //b = (void *)(bi);
+    ITTageBranchInfo * bi = static_cast<ITTageBranchInfo *>(b);
+    calculateIndicesAndTags(tid, branch_pc, bi, true);
+    tagePredict(tid, branch_pc, bi);
+    // Check IUM table
+    Addr pred = predictIUM(bi);
+    return pred;
+}
+
+void
+ITTAGE::tagePredict(ThreadID tid, Addr branch_pc, ITTageBranchInfo * bi)
+{
+    // Look for the bank with longest matching history
+    for (int i = nHistoryTables; i >= 0; i--) {
+        if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+            bi->hitBank = i;
+            break;
+        }
+    }
+
+    // Look for the alternate bank
+    for (int i = bi->hitBank - 1; i >= 0; i--) {
+        if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+            bi->altBank = i;
+            break;
+        }
+    }
+
+    // Computes the prediction and the alternate prediction
+    if (bi->altBank >= 0) {
+ ITTageEntry * table = static_cast<ITTageEntry *>(gtable[bi->altBank]);
+        bi->altTarget = table[tableIndices[bi->altBank]].target;
+    }
+
+    ITTageEntry * table = static_cast<ITTageEntry *>(gtable[bi->hitBank]);
+    bi->predTarget = table[tableIndices[bi->hitBank]].target;
+    // Proceed with the indirection through the target region table
+    bi->altTarget = (bi->altTarget & ((1 << 18) - 1)) +
+ (regionTable[(bi->altTarget >> 18) & 127].region << 18);
+    bi->predTarget = (bi->predTarget & ((1 << 18) - 1)) +
+ (regionTable[(bi->predTarget >> 18) & 127].region << 18);
+    bi->longestMatchPredTarget = bi->predTarget;
+
+    // If the entry is recognized as a newly allocated entry and useAltOnNA
+    // is positive use the alternate prediction
+    if (bi->altBank >= 0)
+ if ((useAltOnNA >= 0) && (table[tableIndices[bi->hitBank]].ctr == 0)) {
+            bi->predTarget = bi->altTarget;
+        }
+
+    bi->branchPC = branch_pc;
+}
+
+void
+ITTAGE::calculateIndicesAndTags(ThreadID tid, Addr branch_pc,
+                                ITTageBranchInfo * bi, bool at_fetch)
+{
+    // Computes the table addresses and the partial tags
+    for (int i = 0; i <= nHistoryTables; i++) {
+        tableIndices[i] = gindex(tid, branch_pc, i, at_fetch);
+        tableTags[i] = gtag(tid, branch_pc, i, at_fetch);
+    }
+
+    for (int i = 2; i <= STEP1 - 1; i++) {
+        tableIndices[i] = ((tableIndices[1] & 7) ^ (i - 1)) +
+                          (tableIndices[i] << 3);
+    }
+
+    for (int i = STEP1 + 1; i <= STEP2 - 1; i++) {
+        tableIndices[i] = ((tableIndices[STEP1] & 7) ^ (i - STEP1)) +
+                          (tableIndices[i] << 3);
+    }
+
+    for (int i = STEP2 + 1; i <= nHistoryTables; i++) {
+        tableIndices[i] = ((tableIndices[STEP2] & 7) ^ (i - STEP2)) +
+                          (tableIndices[i] << 3);
+    }
+
+    tableTags[0] = 0;
+    tableIndices[0] = branch_pc & ((1 << logTagTableSizes[0]) - 1);
+
+    for (int i = 0; i <= nHistoryTables; i++) {
+        bi->tableIndices[i] = tableIndices[i];
+        bi->tableTags[i] = tableTags[i];
+    }
+}
+
+Addr
+ITTAGE::predictIUM(ITTageBranchInfo * bi)
+{
+    int ium_tag = (bi->hitBank) + (tableIndices[bi->hitBank] << 4);
+ int min = (ptIumRetire > ptIumFetch + 8) ? ptIumFetch + 8 : ptIumRetire;
+
+    for (int i = ptIumFetch; i < min; i++) {
+        if (IUMTable[i & ((1 << LOGSPEC) - 1)].tag == ium_tag) {
+            return IUMTable[i & ((1 << LOGSPEC) - 1)].pred;
+        }
+    }
+
+    return bi->predTarget;
+}
+
+void
+ITTAGE::recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
+                       ThreadID tid)
+{
+    HistoryEntry entry(br_addr, tgt_addr, seq_num);
+    threadHistory[tid].pathHist.push_back(entry);
+}
+
+void
+ITTAGE::commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history)
+{
+ ITTageBranchInfo * bi = static_cast<ITTageBranchInfo *>(indirect_history);
+    updateBrIndirect(bi->branchPC, 0, bi->taken, bi->pred, tid,
+                     indirect_history);
+    delete bi;
+}
+
+void
+ITTAGE::historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+                      void * indirect_history, const StaticInstPtr & inst,
+                      Addr target)
+{
+    IUMUpdate(target, indirect_history);
+ ITTageBranchInfo * bi = static_cast<ITTageBranchInfo *>(indirect_history);
+    bi->condBranch = inst->isCondCtrl();
+    // Update fetch histories
+    historyUpdate(tid, branch_pc, taken, indirect_history, target, true);
+}
+
+void
+ITTAGE::IUMUpdate(Addr target, void * b)
+{
+    ITTageBranchInfo * bi = (ITTageBranchInfo *)(b);
+    int ium_tag = (bi->hitBank) + (bi->tableIndices[bi->hitBank] << 4);
+    ptIumFetch--;
+    IUMTable[ptIumFetch & ((1 << LOGSPEC) - 1)].tag = ium_tag;
+    IUMTable[ptIumFetch & ((1 << LOGSPEC) - 1)].pred = target;
+}
+
+void
+ITTAGE::historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+                      void * b, Addr target, bool at_fetch)
+{
+    ITTageBranchInfo * bi = (ITTageBranchInfo *)(b);
+    bi->taken = taken;
+    ThreadHistory & thread_history = threadHistory[tid];
+    int Y;
+    FoldedHistorytmp * H;
+    FoldedHistorytmp * G;
+    FoldedHistorytmp * J;
+
+    if (at_fetch) {
+        Y = thread_history.fetchPtGhist;
+        H = thread_history.fetchComputeIndices;
+        G = thread_history.fetchComputeTags[0];
+        J = thread_history.fetchComputeTags[1];
+    } else {
+        Y = thread_history.retirePtGhist;
+        H = thread_history.retireComputeIndices;
+        G = thread_history.retireComputeTags[0];
+        J = thread_history.retireComputeTags[1];
+    }
+
+    Addr path = ((target ^ (target >> 3) ^ branch_pc));
+
+    if (bi->condBranch) {
+        path = taken;
+    }
+
+    int maxt = 10;
+
+    for (int t = 0; t < maxt; t++) {
+        bool P = (path & 1);
+        path >>= 1;
+        // Update history
+        Y--;
+        thread_history.ghist[Y & (HISTBUFFERLENGTH - 1)] = P;
+
+        // Prepare next index and tag computations for user branchs
+        for (int i = 1; i <= nHistoryTables; i++) {
+            if (at_fetch) {
+                bi->ci[i]  = H[i].comp;
+                bi->ct0[i] = G[i].comp;
+                bi->ct0[i] = J[i].comp;
+            }
+
+            H[i].update(thread_history.ghist, Y);
+            G[i].update(thread_history.ghist, Y);
+            J[i].update(thread_history.ghist, Y);
+        }
+    }
+}
+
+// gindex computes a full hash of pc and ghist
+int
+ITTAGE::gindex(ThreadID tid, Addr pc, int bank, bool at_fetch)
+{
+    FoldedHistorytmp * compute_indices =
+        at_fetch ?
+        threadHistory[tid].fetchComputeIndices :
+        threadHistory[tid].retireComputeIndices;
+    int index = pc ^ (pc >> (abs(logTagTableSizes[bank] - bank) + 1)) ^
+                compute_indices[bank].comp;
+    return (index & ((1 << (logTagTableSizes[bank])) - 1));
+}
+
+// Tag computation
+uint16_t
+ITTAGE::gtag(ThreadID tid, unsigned int pc, int bank, bool at_fetch)
+{
+    FoldedHistorytmp * compute_tags[2];
+
+    if (at_fetch) {
+        compute_tags[0] = threadHistory[tid].fetchComputeTags[0];
+        compute_tags[1] = threadHistory[tid].fetchComputeTags[1];
+    } else {
+        compute_tags[0] = threadHistory[tid].retireComputeTags[0];
+        compute_tags[1] = threadHistory[tid].retireComputeTags[1];
+    }
+
+    int tag = pc ^ compute_tags[0][bank].comp ^
+              (compute_tags[1][bank].comp << 1);
+    return (tag & ((1 << tagTableTagWidths[bank]) - 1));
+}
+
+int
+ITTAGE::getRandom() const
+{
+    return 1; // TODO - REMOVE ME after correlation...
+    // return random_mt.random<int>();
+}
+
+// Predictor update
+void
+ITTAGE::updateBrIndirect(Addr branch_pc, uint16_t br_type, bool taken,
+ Addr target, ThreadID tid, void * indirect_history)
+{
+    //ITTageBranchInfo * bi2 = (ITTageBranchInfo *)(indirect_history);
+    int nrand = getRandom();
+    ptIumRetire--;
+    // Recompute the prediction by the ITTAGE predictor
+    ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+    calculateIndicesAndTags(tid, branch_pc, bi, false);
+    tagePredict(tid, branch_pc, bi);
+ // Allocation if the Longest Matching entry does not provide the correct
+    // entry
+    bool alloc = (bi->longestMatchPredTarget != target);
+
+    if ((bi->hitBank > 0) & (bi->altBank >= 0)) {
+        // Manage the selection between longest matching and alternate
+        // matching for "pseudo"-newly allocated longest matching entry
+        bool pseudoNewAlloc =
+            (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr == 0);
+
+        if (pseudoNewAlloc) {
+            if (bi->altTarget) {
+                if (bi->longestMatchPredTarget != bi->altTarget) {
+                    if ((bi->altTarget == target) ||
+                            (bi->longestMatchPredTarget == target)) {
+                        if (bi->altTarget == target) {
+                            if (useAltOnNA < 7) {
+                                useAltOnNA++;
+                            }
+                        } else {
+                            if (useAltOnNA >= -8) {
+                                useAltOnNA--;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (alloc) {
+        // Need to compute the target field (Offset + Region pointer)
+        uint64_t region = (target >> 18);
+        int ptRegion = -1;
+
+        // Associative search on the region table
+        for (int i = 0; i < 128; i++)
+            if (regionTable[i].region == region) {
+                ptRegion = i;
+                break;
+            }
+
+        if (ptRegion == -1) {
+            // Miss in the target region table, allocate a free entry
+            for (int i = 0; i < 128; i++)
+                if (regionTable[i].u == 0) {
+                    ptRegion = i;
+                    regionTable[i].region = region;
+                    regionTable[i].u = 1;
+                    break;
+                }
+
+            // A very simple replacement policy
+            if (ptRegion == -1) {
+                for (int i = 0; i < 128; i++) {
+                    regionTable[i].u = 0;
+                }
+
+                ptRegion = 0;
+                regionTable[0].region = region;
+                regionTable[0].u = 1;
+            }
+        }
+
+        int indTarget = (target & ((1 << 18) - 1)) + (ptRegion << 18);
+        // We allocate an entry with a longer history to avoid ping-pong,
+        // we do not choose systematically the next entry, but among the 2
+        // next entries
+        int Y = nrand;
+        int X = bi->hitBank + 1;
+
+        if ((Y & 31) == 0) {
+            X++;
+        }
+
+        int T = 2;
+
+        // Allocating 3 entries on a misprediction just work a little bit
+        // better than a single allocation
+        for (int i = X; i <= nHistoryTables; i += 1) {
+            ITTageEntry * table = static_cast<ITTageEntry *>(gtable[i]);
+
+            if (table[tableIndices[i]].u == 0) {
+                table[tableIndices[i]].tag = tableTags[i];
+                table[tableIndices[i]].target = indTarget;
+                table[tableIndices[i]].ctr = 0;
+                table[tableIndices[i]].u = 0;
+
+                if (tick > 0) {
+                    tick--;
+                }
+
+                if (T == 0) {
+                    break;
+                }
+
+                T--;
+                i += 1;
+            } else {
+                tick++;
+            }
+        }
+    }
+
+    if ((tick >= (1 << LOGTICK))) {
+        tick = 0;
+
+        // Reset the useful bit
+        for (int i = 0; i <= nHistoryTables; i++) {
+            for (int j = 0; j < (1 << logTagTableSizes[i]); j++) {
+                gtable[i][j].u = 0;
+            }
+        }
+    }
+
+    if (bi->longestMatchPredTarget == target) {
+        if (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr < 3) {
+            gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr++;
+        }
+    } else {
+        if (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr > 0) {
+            gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr--;
+        } else {
+            // Replace the target field by the new target
+            // Need to compute the target field :-)
+            uint64_t region = (target >> 18);
+            int ptRegion = -1;
+
+            for (int i = 0; i < 128; i++) {
+                if (regionTable[i].region == region) {
+                    ptRegion = i;
+                    break;
+                }
+            }
+
+            if (ptRegion == -1) {
+                for (int i = 0; i < 128; i++) {
+                    if (regionTable[i].u == 0) {
+                        ptRegion = i;
+                        regionTable[i].region = region;
+                        regionTable[i].u = 1;
+                        break;
+                    }
+                }
+
+                // A very simple replacement policy
+                if (ptRegion == -1) {
+                    for (int i = 0; i < 128; i++) {
+                        regionTable[i].u = 0;
+                    }
+
+                    ptRegion = 0;
+                    regionTable[0].region = region;
+                    regionTable[0].u = 1;
+                }
+            }
+
+            int indTarget = (target & ((1 << 18) - 1)) + (ptRegion << 18);
+            ITTageEntry * table =
+                static_cast<ITTageEntry *>(gtable[bi->hitBank]);
+            table[tableIndices[bi->hitBank]].target = indTarget;
+        }
+    }
+
+    // Update the u bit
+    if (bi->hitBank != 0) {
+        if (bi->longestMatchPredTarget != bi->altTarget) {
+            if (bi->longestMatchPredTarget == target) {
+                gtable[bi->hitBank][tableIndices[bi->hitBank]].u = 1;
+            }
+        }
+    }
+
+    // Update retire history path
+    historyUpdate(tid, branch_pc, taken, indirect_history, target, false);
+}
+
+ITTAGE *
+ITTAGEParams::create()
+{
+    return new ITTAGE(this);
+}
+
+
+
+// TODO
+void
+ITTAGE::squash(InstSeqNum seq_num, ThreadID tid)
+{
+}
+
+// TODO
+void
+ITTAGE::recordTarget(InstSeqNum seq_num, const TheISA::PCState & target,
+                     ThreadID tid)
+{
+}
+
+
diff --git a/src/cpu/pred/ittage.hh b/src/cpu/pred/ittage.hh
new file mode 100644
index 0000000..b6d293b
--- /dev/null
+++ b/src/cpu/pred/ittage.hh
@@ -0,0 +1,242 @@
+#ifndef __CPU_PRED_ITTAGE
+#define __CPU_PRED_ITTAGE
+
+#include "base/types.hh"
+#include "cpu/pred/indirect_base.hh"
+#include "params/ITTAGE.hh"
+
+class ITTAGE: public IndirectPredictorBase
+{
+#define STEP1 3
+#define STEP2 11
+#define HISTBUFFERLENGTH 4096 // Size of the history circular buffer
+#define LOGG 12
+#define LOGTICK 6 // For management of the reset of useful counters
+#define LOGSPEC 6
+
+  private:
+
+    // ITTAGE global table entry
+    class ITTageEntry
+    {
+      public:
+        int8_t ctr;
+        uint16_t tag;
+        uint8_t u;
+        uint32_t target; // 25 bits (18 offset + 7-bit region pointer)
+        ITTageEntry() : ctr(0), tag(0), u(0), target(0) {}
+    };
+
+    class FoldedHistorytmp
+    {
+        // This is the cyclic shift register for folding
+        // a long global history into a smaller number of bits;
+        // see P. Michaud's PPM-like predictor at CBP-1
+      public:
+        unsigned comp;
+        int compLength;
+        int origLength;
+        int outpoint;
+
+        FoldedHistorytmp()
+        {
+        }
+
+        void
+        init(int original_length, int compressed_length)
+        {
+            comp = 0;
+            origLength = original_length;
+            compLength = compressed_length;
+            outpoint = origLength % compLength;
+        }
+
+        void
+        update(uint8_t * h, int pt)
+        {
+            comp = (comp << 1) | h[pt & (HISTBUFFERLENGTH - 1)];
+ comp ^= h[(pt + origLength) & (HISTBUFFERLENGTH - 1)] << outpoint;
+            comp ^= (comp >> compLength);
+            comp &= (1 << compLength) - 1;
+        }
+    };
+
+    int tick; // Control counter for the resetting of useful bits
+
+    // Class for storing speculative predictions: i.e. provided by a table
+    // entry that has already provided a still speculative prediction
+    struct IUMEntry {
+      public:
+        int32_t tag;
+        Addr pred;
+        IUMEntry() {}
+    };
+
+    class RegionEntry // ITTAGE target region table entry
+    {
+      public:
+        uint64_t region; // 14 bits (not any more, now 46 bits)
+        int8_t u; // 1 bit
+        RegionEntry() : region(0), u(0) {}
+    };
+
+    struct HistoryEntry {
+        HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num)
+            : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { }
+        Addr pcAddr;
+        Addr targetAddr;
+        InstSeqNum seqNum;
+    };
+
+    // Keep per-thread histories to
+    // support SMT.
+    class ThreadHistory
+    {
+      public:
+        // Speculative branc history (circular buffer)
+        uint8_t ghist[HISTBUFFERLENGTH];
+
+        // For management at fetch time
+        int fetchPtGhist;
+        FoldedHistorytmp * fetchComputeIndices;
+        FoldedHistorytmp * fetchComputeTags[2];
+
+        // For management at retire time
+        int retirePtGhist;
+        FoldedHistorytmp * retireComputeIndices;
+        FoldedHistorytmp * retireComputeTags[2];
+
+        std::deque<HistoryEntry> pathHist;
+        unsigned headHistEntry;
+    };
+
+    std::vector<ThreadHistory> threadHistory;
+
+    struct ITTageBranchInfo {
+        Addr predTarget; // Prediction
+        Addr altTarget;  // Alternate prediction
+        Addr pred;       // LTTAGE prediction
+        Addr longestMatchPredTarget;
+        int hitBank;
+        int altBank;
+        Addr branchPC;
+        bool taken;
+        bool condBranch;
+
+        // Pointer to dynamically allocated storage
+        // to save table indices and folded histories.
+        // To do one call to new instead of 5.
+        int * storage;
+
+        // Pointers to actual saved array within the dynamically
+        // allocated storage.
+        int * tableIndices;
+        int * tableTags;
+        int * ci;
+        int * ct0;
+        int * ct1;
+
+        ITTageBranchInfo(int sz)
+            : predTarget(0),
+              altTarget(0),
+              pred(0),
+              hitBank(0),
+              altBank(0),
+              branchPC(0),
+              taken(false),
+              condBranch(false)
+        {
+            storage = new int [sz * 5];
+            tableIndices = storage;
+            tableTags = storage + sz;
+            ci = tableTags + sz;
+            ct0 = ci + sz;
+            ct1 = ct0 + sz;
+        }
+    };
+
+    // "Use alternate prediction on weak predictions": a 4-bit counter to
+    // determine whether the newly allocated entries should be considered
+    // as valid or not for delivering the prediction
+    int8_t useAltOnNA;
+
+    // For the pseudo-random number generator
+    int seed;
+
+    // For the IUM
+    int ptIumRetire;
+    int ptIumFetch;
+    IUMEntry * IUMTable;
+
+    // Target region tables
+    RegionEntry * regionTable;
+
+    const unsigned nHistoryTables;
+    std::vector<unsigned> tagTableTagWidths;
+    std::vector<int> logTagTableSizes;
+    ITTageEntry ** gtable;
+    std::vector<int> histLengths;
+    int * tableIndices;
+    int * tableTags;
+
+  public:
+
+    ITTAGE(const ITTAGEParams * params);
+
+    void
+ updateDirectionInfo(ThreadID tid, bool taken, void *& indirect_history);
+    bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+                void *& bp_history);
+    void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
+                        ThreadID tid);
+ void commit(InstSeqNum seq_num, ThreadID tidi, void * indirect_history);
+    void squash(InstSeqNum seq_num, ThreadID tid);
+    void recordTarget(InstSeqNum seq_num, const TheISA::PCState & target,
+                      ThreadID tid);
+    virtual void
+    changeDirectionPrediction(ThreadID tid,
+                              void * indirect_history,
+                              bool actually_taken)
+    {}
+    virtual void
+    deleteDirectionInfo(ThreadID tid,
+                        void * indirect_history)
+    {}
+
+  private:
+
+    int getRandom() const;
+
+    // gindex computes a full hash of pc and ghist
+    int gindex(ThreadID tid, Addr pc, int bank, bool at_fetch);
+
+    // Tag computation
+    uint16_t gtag(ThreadID tid, unsigned int pc, int bank, bool at_fetch);
+
+    void tagePredict(ThreadID tid, Addr branch_pc, ITTageBranchInfo * bi);
+
+    void calculateIndicesAndTags(ThreadID tid, Addr branch_pc,
+                                 ITTageBranchInfo * bi, bool at_fetch);
+
+    Addr predict(ThreadID tid, Addr branch_pc, void *& b);
+
+    Addr predictIUM(ITTageBranchInfo * bi);
+
+    void IUMUpdate(Addr target, void * b);
+
+    // Update fetch histories
+    //void fetchHistoryUpdate(Addr pc, uint16_t br_type, bool taken,
+    //                        Addr target, ThreadID tid);
+    void historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+                       void * bp_history, const StaticInstPtr & inst,
+                       Addr target) override;
+
+    void historyUpdate(ThreadID tid, Addr branch_pc, bool taken, void * b,
+                       Addr target, bool at_fetch);
+
+    // Predictor update
+ void updateBrIndirect(Addr pc, uint16_t br_type, bool taken, Addr target,
+                          ThreadID tid, void * indirect_history);
+
+};
+#endif // __CPU_PRED_ITTAGE

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/15335
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: Ie0ee7fe31ffe5177042ffddd61d2099bcf14b9cb
Gerrit-Change-Number: 15335
Gerrit-PatchSet: 1
Gerrit-Owner: Jairo Balart <jairo.bal...@metempsy.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to