Jairo Balart has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/15335
Change subject: cpu: ITTAGE indirect branch predictor model
......................................................................
cpu: ITTAGE indirect branch predictor model
Change-Id: Ie0ee7fe31ffe5177042ffddd61d2099bcf14b9cb
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
M src/cpu/pred/bpred_unit.cc
M src/cpu/pred/indirect.cc
M src/cpu/pred/indirect.hh
M src/cpu/pred/indirect_base.hh
A src/cpu/pred/ittage.cc
A src/cpu/pred/ittage.hh
8 files changed, 846 insertions(+), 4 deletions(-)
diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index 1e37ab3..040a908 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -423,3 +423,16 @@
logInb = 7
+class ITTAGE(IndirectPredictorBase):
+ type = 'ITTAGE'
+ cxx_class = 'ITTAGE'
+ cxx_header = "cpu/pred/ittage.hh"
+
+ nHistoryTables = Param.Unsigned(15, "Number of history tables")
+ minHist = Param.Unsigned(8, "Minimum history size")
+ maxHist = Param.Unsigned(2000, "Maximum history size")
+ histBufferSize = Param.Unsigned(4096,
+ "A large number to track all branch histories")
+ histLengths = VectorParam.Int(
+ [0, 0, 10, 16, 27, 44, 60, 96, 109, 219, 449, 487, 714, 1313, 2146,
+ 3881], "History lengths")
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index a30ad02..07f82cc 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -41,6 +41,7 @@
Source('btb.cc')
Source('indirect.cc')
Source('indirect_base.cc')
+Source('ittage.cc')
Source('ras.cc')
Source('tournament.cc')
Source ('bi_mode.cc')
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index d8a268a..7edcfe7 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -277,7 +277,8 @@
predict_record.wasIndirect = true;
++indirectLookups;
//Consult indirect predictor on indirect control
- if (iPred->lookup(pc.instAddr(), target, tid)) {
+ if (iPred->lookup(pc.instAddr(), target, tid,
+ indirect_history)) {
// Indirect predictor hit
++indirectHits;
DPRINTF(Branch, "[tid:%i]: Instruction %s predicted "
@@ -297,6 +298,9 @@
}
iPred->recordIndirect(pc.instAddr(), target.instAddr(),
seqNum,
tid);
+ iPred->historyUpdate(tid, pc.instAddr(), pred_taken,
+ indirect_history, inst,
+ target.instAddr());
}
}
} else {
diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc
index be9a59d..1fb2db1 100644
--- a/src/cpu/pred/indirect.cc
+++ b/src/cpu/pred/indirect.cc
@@ -77,7 +77,7 @@
bool
IndirectPredictor::lookup(Addr br_addr, TheISA::PCState& target,
- ThreadID tid)
+ ThreadID tid, void *& bp_history)
{
Addr set_index = getSetIndex(br_addr, threadInfo[tid].ghr, tid);
Addr tag = getTag(br_addr);
diff --git a/src/cpu/pred/indirect.hh b/src/cpu/pred/indirect.hh
index 37442e6..a9d98df 100644
--- a/src/cpu/pred/indirect.hh
+++ b/src/cpu/pred/indirect.hh
@@ -43,7 +43,8 @@
{
public:
IndirectPredictor(const IndirectPredictorParams * params);
- bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid);
+ bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+ void *& bp_history);
void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
ThreadID tid);
void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
diff --git a/src/cpu/pred/indirect_base.hh b/src/cpu/pred/indirect_base.hh
index e8ffcf6..c4e9d01 100644
--- a/src/cpu/pred/indirect_base.hh
+++ b/src/cpu/pred/indirect_base.hh
@@ -49,7 +49,7 @@
}
virtual bool lookup(Addr br_addr, TheISA::PCState& br_target,
- ThreadID tid) = 0;
+ ThreadID tid, void *& bp_history) = 0;
virtual void recordIndirect(Addr br_addr, Addr tgt_addr,
InstSeqNum seq_num, ThreadID tid) = 0;
virtual void commit(InstSeqNum seq_num, ThreadID tid,
@@ -64,6 +64,12 @@
bool actually_taken) = 0;
virtual void deleteDirectionInfo(ThreadID tid,
void * indirect_history) = 0;
+
+ virtual void historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+ void * bp_history, const StaticInstPtr
&inst,
+ Addr target)
+ {
+ }
};
#endif // __CPU_PRED_INDIRECT_BASE_HH__
diff --git a/src/cpu/pred/ittage.cc b/src/cpu/pred/ittage.cc
new file mode 100644
index 0000000..ecbdd4c
--- /dev/null
+++ b/src/cpu/pred/ittage.cc
@@ -0,0 +1,575 @@
+#include "cpu/pred/ittage.hh"
+
+#include "base/random.hh"
+#include "cpu/static_inst.hh"
+
+ITTAGE::ITTAGE(const ITTAGEParams * params)
+ : IndirectPredictorBase(params),
+ threadHistory(params->numThreads),
+ useAltOnNA(0),
+ seed(0),
+ ptIumRetire(0),
+ ptIumFetch(0),
+ nHistoryTables(params->nHistoryTables),
+ histLengths(params->histLengths)
+{
+ for (auto & history : threadHistory) {
+ for (int i = 0; i < HISTBUFFERLENGTH; i++) {
+ history.ghist[0] = 0;
+ }
+ }
+
+ logTagTableSizes.resize(nHistoryTables + 1);
+ logTagTableSizes[0] = LOGG;
+ logTagTableSizes[1] = LOGG;
+ logTagTableSizes[STEP1] = LOGG;
+ logTagTableSizes[STEP2] = LOGG - 1;
+
+ // Grouped together with T1 4K entries
+ for (int i = 2; i <= STEP1 - 1; i++) {
+ logTagTableSizes[i] = logTagTableSizes[1] - 3;
+ }
+
+ // Grouped together with T4 4K entries
+ for (int i = STEP1 + 1; i <= STEP2 - 1; i++) {
+ logTagTableSizes[i] = logTagTableSizes[STEP1] - 3;
+ }
+
+ // Grouped together with T12 2K entries
+ for (int i = STEP2 + 1; i <= nHistoryTables; i++) {
+ logTagTableSizes[i] = logTagTableSizes[STEP2] - 3;
+ }
+
+ gtable = new ITTageEntry * [nHistoryTables + 1];
+ gtable[0] = new ITTageEntry[1 << logTagTableSizes[0]];
+ gtable[1] = new ITTageEntry[1 << logTagTableSizes[1]];
+ gtable[STEP1] = new ITTageEntry[1 << logTagTableSizes[STEP1]];
+ gtable[STEP2] = new ITTageEntry[1 << logTagTableSizes[STEP2]];
+ tagTableTagWidths.resize(nHistoryTables + 1);
+ tagTableTagWidths[0] = 0; // T0 is tagless
+
+ for (int i = 1; i <= STEP1 - 1; i++) {
+ gtable[i] = gtable[1];
+ tagTableTagWidths[i] = 9;
+ }
+
+ for (int i = STEP1; i <= STEP2 - 1; i++) {
+ gtable[i] = gtable[STEP1];
+ tagTableTagWidths[i] = 13;
+ }
+
+ for (int i = STEP2; i <= nHistoryTables; i++) {
+ gtable[i] = gtable[STEP2];
+ tagTableTagWidths[i] = 15;
+ }
+
+ for (auto & history : threadHistory) {
+ // Initialisation of index and tag computation functions
+ history.fetchComputeIndices = new FoldedHistorytmp[nHistoryTables
+ 1];
+ history.fetchComputeTags[0] = new FoldedHistorytmp[nHistoryTables
+ 1];
+ history.fetchComputeTags[1] = new FoldedHistorytmp[nHistoryTables
+ 1];
+
+ for (int i = 1; i <= nHistoryTables; i++) {
+ history.fetchComputeIndices[i].init(histLengths[i],
+ logTagTableSizes[i]);
+ history.fetchComputeTags[0][i].init(
+ history.fetchComputeIndices[i].origLength,
+ tagTableTagWidths[i]);
+ history.fetchComputeTags[1][i].init(
+ history.fetchComputeIndices[i].origLength,
+ tagTableTagWidths[i] - 1);
+ }
+
+ history.fetchPtGhist = 0;
+ history.retireComputeIndices =
+ new FoldedHistorytmp[nHistoryTables + 1];
+ history.retireComputeTags[0] =
+ new FoldedHistorytmp[nHistoryTables + 1];
+ history.retireComputeTags[1] =
+ new FoldedHistorytmp[nHistoryTables + 1];
+
+ for (int i = 1; i <= nHistoryTables; i++) {
+ history.retireComputeIndices[i].init(histLengths[i],
+ logTagTableSizes[i]);
+ history.retireComputeTags[0][i].init(
+ history.retireComputeIndices[i].origLength,
+ tagTableTagWidths[i]);
+ history.retireComputeTags[1][i].init(
+ history.retireComputeIndices[i].origLength,
+ tagTableTagWidths[i] - 1);
+ }
+
+ history.retirePtGhist = 0;
+ }
+
+ regionTable = new RegionEntry[128];
+ IUMTable = new IUMEntry[1 << LOGSPEC];
+ tableIndices = new int [nHistoryTables + 1];
+ tableTags = new int [nHistoryTables + 1];
+}
+
+void
+ITTAGE::updateDirectionInfo(ThreadID tid, bool taken, void *&
indirect_history)
+{
+ ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+ indirect_history = (void *)(bi);
+ bi->taken = taken;
+}
+
+bool
+ITTAGE::lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+ void *& bp_history)
+{
+ br_target = predict(tid, br_addr, bp_history);
+ return br_target != 0;
+}
+
+Addr
+ITTAGE::predict(ThreadID tid, Addr branch_pc, void *& b)
+{
+ //ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+ //b = (void *)(bi);
+ ITTageBranchInfo * bi = static_cast<ITTageBranchInfo *>(b);
+ calculateIndicesAndTags(tid, branch_pc, bi, true);
+ tagePredict(tid, branch_pc, bi);
+ // Check IUM table
+ Addr pred = predictIUM(bi);
+ return pred;
+}
+
+void
+ITTAGE::tagePredict(ThreadID tid, Addr branch_pc, ITTageBranchInfo * bi)
+{
+ // Look for the bank with longest matching history
+ for (int i = nHistoryTables; i >= 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->hitBank = i;
+ break;
+ }
+ }
+
+ // Look for the alternate bank
+ for (int i = bi->hitBank - 1; i >= 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->altBank = i;
+ break;
+ }
+ }
+
+ // Computes the prediction and the alternate prediction
+ if (bi->altBank >= 0) {
+ ITTageEntry * table = static_cast<ITTageEntry
*>(gtable[bi->altBank]);
+ bi->altTarget = table[tableIndices[bi->altBank]].target;
+ }
+
+ ITTageEntry * table = static_cast<ITTageEntry *>(gtable[bi->hitBank]);
+ bi->predTarget = table[tableIndices[bi->hitBank]].target;
+ // Proceed with the indirection through the target region table
+ bi->altTarget = (bi->altTarget & ((1 << 18) - 1)) +
+ (regionTable[(bi->altTarget >> 18) & 127].region <<
18);
+ bi->predTarget = (bi->predTarget & ((1 << 18) - 1)) +
+ (regionTable[(bi->predTarget >> 18) & 127].region <<
18);
+ bi->longestMatchPredTarget = bi->predTarget;
+
+ // If the entry is recognized as a newly allocated entry and useAltOnNA
+ // is positive use the alternate prediction
+ if (bi->altBank >= 0)
+ if ((useAltOnNA >= 0) && (table[tableIndices[bi->hitBank]].ctr ==
0)) {
+ bi->predTarget = bi->altTarget;
+ }
+
+ bi->branchPC = branch_pc;
+}
+
+void
+ITTAGE::calculateIndicesAndTags(ThreadID tid, Addr branch_pc,
+ ITTageBranchInfo * bi, bool at_fetch)
+{
+ // Computes the table addresses and the partial tags
+ for (int i = 0; i <= nHistoryTables; i++) {
+ tableIndices[i] = gindex(tid, branch_pc, i, at_fetch);
+ tableTags[i] = gtag(tid, branch_pc, i, at_fetch);
+ }
+
+ for (int i = 2; i <= STEP1 - 1; i++) {
+ tableIndices[i] = ((tableIndices[1] & 7) ^ (i - 1)) +
+ (tableIndices[i] << 3);
+ }
+
+ for (int i = STEP1 + 1; i <= STEP2 - 1; i++) {
+ tableIndices[i] = ((tableIndices[STEP1] & 7) ^ (i - STEP1)) +
+ (tableIndices[i] << 3);
+ }
+
+ for (int i = STEP2 + 1; i <= nHistoryTables; i++) {
+ tableIndices[i] = ((tableIndices[STEP2] & 7) ^ (i - STEP2)) +
+ (tableIndices[i] << 3);
+ }
+
+ tableTags[0] = 0;
+ tableIndices[0] = branch_pc & ((1 << logTagTableSizes[0]) - 1);
+
+ for (int i = 0; i <= nHistoryTables; i++) {
+ bi->tableIndices[i] = tableIndices[i];
+ bi->tableTags[i] = tableTags[i];
+ }
+}
+
+Addr
+ITTAGE::predictIUM(ITTageBranchInfo * bi)
+{
+ int ium_tag = (bi->hitBank) + (tableIndices[bi->hitBank] << 4);
+ int min = (ptIumRetire > ptIumFetch + 8) ? ptIumFetch + 8 :
ptIumRetire;
+
+ for (int i = ptIumFetch; i < min; i++) {
+ if (IUMTable[i & ((1 << LOGSPEC) - 1)].tag == ium_tag) {
+ return IUMTable[i & ((1 << LOGSPEC) - 1)].pred;
+ }
+ }
+
+ return bi->predTarget;
+}
+
+void
+ITTAGE::recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
+ ThreadID tid)
+{
+ HistoryEntry entry(br_addr, tgt_addr, seq_num);
+ threadHistory[tid].pathHist.push_back(entry);
+}
+
+void
+ITTAGE::commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history)
+{
+ ITTageBranchInfo * bi = static_cast<ITTageBranchInfo
*>(indirect_history);
+ updateBrIndirect(bi->branchPC, 0, bi->taken, bi->pred, tid,
+ indirect_history);
+ delete bi;
+}
+
+void
+ITTAGE::historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+ void * indirect_history, const StaticInstPtr & inst,
+ Addr target)
+{
+ IUMUpdate(target, indirect_history);
+ ITTageBranchInfo * bi = static_cast<ITTageBranchInfo
*>(indirect_history);
+ bi->condBranch = inst->isCondCtrl();
+ // Update fetch histories
+ historyUpdate(tid, branch_pc, taken, indirect_history, target, true);
+}
+
+void
+ITTAGE::IUMUpdate(Addr target, void * b)
+{
+ ITTageBranchInfo * bi = (ITTageBranchInfo *)(b);
+ int ium_tag = (bi->hitBank) + (bi->tableIndices[bi->hitBank] << 4);
+ ptIumFetch--;
+ IUMTable[ptIumFetch & ((1 << LOGSPEC) - 1)].tag = ium_tag;
+ IUMTable[ptIumFetch & ((1 << LOGSPEC) - 1)].pred = target;
+}
+
+void
+ITTAGE::historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+ void * b, Addr target, bool at_fetch)
+{
+ ITTageBranchInfo * bi = (ITTageBranchInfo *)(b);
+ bi->taken = taken;
+ ThreadHistory & thread_history = threadHistory[tid];
+ int Y;
+ FoldedHistorytmp * H;
+ FoldedHistorytmp * G;
+ FoldedHistorytmp * J;
+
+ if (at_fetch) {
+ Y = thread_history.fetchPtGhist;
+ H = thread_history.fetchComputeIndices;
+ G = thread_history.fetchComputeTags[0];
+ J = thread_history.fetchComputeTags[1];
+ } else {
+ Y = thread_history.retirePtGhist;
+ H = thread_history.retireComputeIndices;
+ G = thread_history.retireComputeTags[0];
+ J = thread_history.retireComputeTags[1];
+ }
+
+ Addr path = ((target ^ (target >> 3) ^ branch_pc));
+
+ if (bi->condBranch) {
+ path = taken;
+ }
+
+ int maxt = 10;
+
+ for (int t = 0; t < maxt; t++) {
+ bool P = (path & 1);
+ path >>= 1;
+ // Update history
+ Y--;
+ thread_history.ghist[Y & (HISTBUFFERLENGTH - 1)] = P;
+
+ // Prepare next index and tag computations for user branchs
+ for (int i = 1; i <= nHistoryTables; i++) {
+ if (at_fetch) {
+ bi->ci[i] = H[i].comp;
+ bi->ct0[i] = G[i].comp;
+ bi->ct0[i] = J[i].comp;
+ }
+
+ H[i].update(thread_history.ghist, Y);
+ G[i].update(thread_history.ghist, Y);
+ J[i].update(thread_history.ghist, Y);
+ }
+ }
+}
+
+// gindex computes a full hash of pc and ghist
+int
+ITTAGE::gindex(ThreadID tid, Addr pc, int bank, bool at_fetch)
+{
+ FoldedHistorytmp * compute_indices =
+ at_fetch ?
+ threadHistory[tid].fetchComputeIndices :
+ threadHistory[tid].retireComputeIndices;
+ int index = pc ^ (pc >> (abs(logTagTableSizes[bank] - bank) + 1)) ^
+ compute_indices[bank].comp;
+ return (index & ((1 << (logTagTableSizes[bank])) - 1));
+}
+
+// Tag computation
+uint16_t
+ITTAGE::gtag(ThreadID tid, unsigned int pc, int bank, bool at_fetch)
+{
+ FoldedHistorytmp * compute_tags[2];
+
+ if (at_fetch) {
+ compute_tags[0] = threadHistory[tid].fetchComputeTags[0];
+ compute_tags[1] = threadHistory[tid].fetchComputeTags[1];
+ } else {
+ compute_tags[0] = threadHistory[tid].retireComputeTags[0];
+ compute_tags[1] = threadHistory[tid].retireComputeTags[1];
+ }
+
+ int tag = pc ^ compute_tags[0][bank].comp ^
+ (compute_tags[1][bank].comp << 1);
+ return (tag & ((1 << tagTableTagWidths[bank]) - 1));
+}
+
+int
+ITTAGE::getRandom() const
+{
+ return 1; // TODO - REMOVE ME after correlation...
+ // return random_mt.random<int>();
+}
+
+// Predictor update
+void
+ITTAGE::updateBrIndirect(Addr branch_pc, uint16_t br_type, bool taken,
+ Addr target, ThreadID tid, void *
indirect_history)
+{
+ //ITTageBranchInfo * bi2 = (ITTageBranchInfo *)(indirect_history);
+ int nrand = getRandom();
+ ptIumRetire--;
+ // Recompute the prediction by the ITTAGE predictor
+ ITTageBranchInfo * bi = new ITTageBranchInfo(nHistoryTables + 1);
+ calculateIndicesAndTags(tid, branch_pc, bi, false);
+ tagePredict(tid, branch_pc, bi);
+ // Allocation if the Longest Matching entry does not provide the
correct
+ // entry
+ bool alloc = (bi->longestMatchPredTarget != target);
+
+ if ((bi->hitBank > 0) & (bi->altBank >= 0)) {
+ // Manage the selection between longest matching and alternate
+ // matching for "pseudo"-newly allocated longest matching entry
+ bool pseudoNewAlloc =
+ (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr == 0);
+
+ if (pseudoNewAlloc) {
+ if (bi->altTarget) {
+ if (bi->longestMatchPredTarget != bi->altTarget) {
+ if ((bi->altTarget == target) ||
+ (bi->longestMatchPredTarget == target)) {
+ if (bi->altTarget == target) {
+ if (useAltOnNA < 7) {
+ useAltOnNA++;
+ }
+ } else {
+ if (useAltOnNA >= -8) {
+ useAltOnNA--;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (alloc) {
+ // Need to compute the target field (Offset + Region pointer)
+ uint64_t region = (target >> 18);
+ int ptRegion = -1;
+
+ // Associative search on the region table
+ for (int i = 0; i < 128; i++)
+ if (regionTable[i].region == region) {
+ ptRegion = i;
+ break;
+ }
+
+ if (ptRegion == -1) {
+ // Miss in the target region table, allocate a free entry
+ for (int i = 0; i < 128; i++)
+ if (regionTable[i].u == 0) {
+ ptRegion = i;
+ regionTable[i].region = region;
+ regionTable[i].u = 1;
+ break;
+ }
+
+ // A very simple replacement policy
+ if (ptRegion == -1) {
+ for (int i = 0; i < 128; i++) {
+ regionTable[i].u = 0;
+ }
+
+ ptRegion = 0;
+ regionTable[0].region = region;
+ regionTable[0].u = 1;
+ }
+ }
+
+ int indTarget = (target & ((1 << 18) - 1)) + (ptRegion << 18);
+ // We allocate an entry with a longer history to avoid ping-pong,
+ // we do not choose systematically the next entry, but among the 2
+ // next entries
+ int Y = nrand;
+ int X = bi->hitBank + 1;
+
+ if ((Y & 31) == 0) {
+ X++;
+ }
+
+ int T = 2;
+
+ // Allocating 3 entries on a misprediction just work a little bit
+ // better than a single allocation
+ for (int i = X; i <= nHistoryTables; i += 1) {
+ ITTageEntry * table = static_cast<ITTageEntry *>(gtable[i]);
+
+ if (table[tableIndices[i]].u == 0) {
+ table[tableIndices[i]].tag = tableTags[i];
+ table[tableIndices[i]].target = indTarget;
+ table[tableIndices[i]].ctr = 0;
+ table[tableIndices[i]].u = 0;
+
+ if (tick > 0) {
+ tick--;
+ }
+
+ if (T == 0) {
+ break;
+ }
+
+ T--;
+ i += 1;
+ } else {
+ tick++;
+ }
+ }
+ }
+
+ if ((tick >= (1 << LOGTICK))) {
+ tick = 0;
+
+ // Reset the useful bit
+ for (int i = 0; i <= nHistoryTables; i++) {
+ for (int j = 0; j < (1 << logTagTableSizes[i]); j++) {
+ gtable[i][j].u = 0;
+ }
+ }
+ }
+
+ if (bi->longestMatchPredTarget == target) {
+ if (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr < 3) {
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr++;
+ }
+ } else {
+ if (gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr > 0) {
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr--;
+ } else {
+ // Replace the target field by the new target
+ // Need to compute the target field :-)
+ uint64_t region = (target >> 18);
+ int ptRegion = -1;
+
+ for (int i = 0; i < 128; i++) {
+ if (regionTable[i].region == region) {
+ ptRegion = i;
+ break;
+ }
+ }
+
+ if (ptRegion == -1) {
+ for (int i = 0; i < 128; i++) {
+ if (regionTable[i].u == 0) {
+ ptRegion = i;
+ regionTable[i].region = region;
+ regionTable[i].u = 1;
+ break;
+ }
+ }
+
+ // A very simple replacement policy
+ if (ptRegion == -1) {
+ for (int i = 0; i < 128; i++) {
+ regionTable[i].u = 0;
+ }
+
+ ptRegion = 0;
+ regionTable[0].region = region;
+ regionTable[0].u = 1;
+ }
+ }
+
+ int indTarget = (target & ((1 << 18) - 1)) + (ptRegion << 18);
+ ITTageEntry * table =
+ static_cast<ITTageEntry *>(gtable[bi->hitBank]);
+ table[tableIndices[bi->hitBank]].target = indTarget;
+ }
+ }
+
+ // Update the u bit
+ if (bi->hitBank != 0) {
+ if (bi->longestMatchPredTarget != bi->altTarget) {
+ if (bi->longestMatchPredTarget == target) {
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].u = 1;
+ }
+ }
+ }
+
+ // Update retire history path
+ historyUpdate(tid, branch_pc, taken, indirect_history, target, false);
+}
+
+ITTAGE *
+ITTAGEParams::create()
+{
+ return new ITTAGE(this);
+}
+
+
+
+// TODO
+void
+ITTAGE::squash(InstSeqNum seq_num, ThreadID tid)
+{
+}
+
+// TODO
+void
+ITTAGE::recordTarget(InstSeqNum seq_num, const TheISA::PCState & target,
+ ThreadID tid)
+{
+}
+
+
diff --git a/src/cpu/pred/ittage.hh b/src/cpu/pred/ittage.hh
new file mode 100644
index 0000000..b6d293b
--- /dev/null
+++ b/src/cpu/pred/ittage.hh
@@ -0,0 +1,242 @@
+#ifndef __CPU_PRED_ITTAGE
+#define __CPU_PRED_ITTAGE
+
+#include "base/types.hh"
+#include "cpu/pred/indirect_base.hh"
+#include "params/ITTAGE.hh"
+
+class ITTAGE: public IndirectPredictorBase
+{
+#define STEP1 3
+#define STEP2 11
+#define HISTBUFFERLENGTH 4096 // Size of the history circular buffer
+#define LOGG 12
+#define LOGTICK 6 // For management of the reset of useful counters
+#define LOGSPEC 6
+
+ private:
+
+ // ITTAGE global table entry
+ class ITTageEntry
+ {
+ public:
+ int8_t ctr;
+ uint16_t tag;
+ uint8_t u;
+ uint32_t target; // 25 bits (18 offset + 7-bit region pointer)
+ ITTageEntry() : ctr(0), tag(0), u(0), target(0) {}
+ };
+
+ class FoldedHistorytmp
+ {
+ // This is the cyclic shift register for folding
+ // a long global history into a smaller number of bits;
+ // see P. Michaud's PPM-like predictor at CBP-1
+ public:
+ unsigned comp;
+ int compLength;
+ int origLength;
+ int outpoint;
+
+ FoldedHistorytmp()
+ {
+ }
+
+ void
+ init(int original_length, int compressed_length)
+ {
+ comp = 0;
+ origLength = original_length;
+ compLength = compressed_length;
+ outpoint = origLength % compLength;
+ }
+
+ void
+ update(uint8_t * h, int pt)
+ {
+ comp = (comp << 1) | h[pt & (HISTBUFFERLENGTH - 1)];
+ comp ^= h[(pt + origLength) & (HISTBUFFERLENGTH - 1)] <<
outpoint;
+ comp ^= (comp >> compLength);
+ comp &= (1 << compLength) - 1;
+ }
+ };
+
+ int tick; // Control counter for the resetting of useful bits
+
+ // Class for storing speculative predictions: i.e. provided by a table
+ // entry that has already provided a still speculative prediction
+ struct IUMEntry {
+ public:
+ int32_t tag;
+ Addr pred;
+ IUMEntry() {}
+ };
+
+ class RegionEntry // ITTAGE target region table entry
+ {
+ public:
+ uint64_t region; // 14 bits (not any more, now 46 bits)
+ int8_t u; // 1 bit
+ RegionEntry() : region(0), u(0) {}
+ };
+
+ struct HistoryEntry {
+ HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num)
+ : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { }
+ Addr pcAddr;
+ Addr targetAddr;
+ InstSeqNum seqNum;
+ };
+
+ // Keep per-thread histories to
+ // support SMT.
+ class ThreadHistory
+ {
+ public:
+ // Speculative branc history (circular buffer)
+ uint8_t ghist[HISTBUFFERLENGTH];
+
+ // For management at fetch time
+ int fetchPtGhist;
+ FoldedHistorytmp * fetchComputeIndices;
+ FoldedHistorytmp * fetchComputeTags[2];
+
+ // For management at retire time
+ int retirePtGhist;
+ FoldedHistorytmp * retireComputeIndices;
+ FoldedHistorytmp * retireComputeTags[2];
+
+ std::deque<HistoryEntry> pathHist;
+ unsigned headHistEntry;
+ };
+
+ std::vector<ThreadHistory> threadHistory;
+
+ struct ITTageBranchInfo {
+ Addr predTarget; // Prediction
+ Addr altTarget; // Alternate prediction
+ Addr pred; // LTTAGE prediction
+ Addr longestMatchPredTarget;
+ int hitBank;
+ int altBank;
+ Addr branchPC;
+ bool taken;
+ bool condBranch;
+
+ // Pointer to dynamically allocated storage
+ // to save table indices and folded histories.
+ // To do one call to new instead of 5.
+ int * storage;
+
+ // Pointers to actual saved array within the dynamically
+ // allocated storage.
+ int * tableIndices;
+ int * tableTags;
+ int * ci;
+ int * ct0;
+ int * ct1;
+
+ ITTageBranchInfo(int sz)
+ : predTarget(0),
+ altTarget(0),
+ pred(0),
+ hitBank(0),
+ altBank(0),
+ branchPC(0),
+ taken(false),
+ condBranch(false)
+ {
+ storage = new int [sz * 5];
+ tableIndices = storage;
+ tableTags = storage + sz;
+ ci = tableTags + sz;
+ ct0 = ci + sz;
+ ct1 = ct0 + sz;
+ }
+ };
+
+ // "Use alternate prediction on weak predictions": a 4-bit counter to
+ // determine whether the newly allocated entries should be considered
+ // as valid or not for delivering the prediction
+ int8_t useAltOnNA;
+
+ // For the pseudo-random number generator
+ int seed;
+
+ // For the IUM
+ int ptIumRetire;
+ int ptIumFetch;
+ IUMEntry * IUMTable;
+
+ // Target region tables
+ RegionEntry * regionTable;
+
+ const unsigned nHistoryTables;
+ std::vector<unsigned> tagTableTagWidths;
+ std::vector<int> logTagTableSizes;
+ ITTageEntry ** gtable;
+ std::vector<int> histLengths;
+ int * tableIndices;
+ int * tableTags;
+
+ public:
+
+ ITTAGE(const ITTAGEParams * params);
+
+ void
+ updateDirectionInfo(ThreadID tid, bool taken, void *&
indirect_history);
+ bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid,
+ void *& bp_history);
+ void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
+ ThreadID tid);
+ void commit(InstSeqNum seq_num, ThreadID tidi, void *
indirect_history);
+ void squash(InstSeqNum seq_num, ThreadID tid);
+ void recordTarget(InstSeqNum seq_num, const TheISA::PCState & target,
+ ThreadID tid);
+ virtual void
+ changeDirectionPrediction(ThreadID tid,
+ void * indirect_history,
+ bool actually_taken)
+ {}
+ virtual void
+ deleteDirectionInfo(ThreadID tid,
+ void * indirect_history)
+ {}
+
+ private:
+
+ int getRandom() const;
+
+ // gindex computes a full hash of pc and ghist
+ int gindex(ThreadID tid, Addr pc, int bank, bool at_fetch);
+
+ // Tag computation
+ uint16_t gtag(ThreadID tid, unsigned int pc, int bank, bool at_fetch);
+
+ void tagePredict(ThreadID tid, Addr branch_pc, ITTageBranchInfo * bi);
+
+ void calculateIndicesAndTags(ThreadID tid, Addr branch_pc,
+ ITTageBranchInfo * bi, bool at_fetch);
+
+ Addr predict(ThreadID tid, Addr branch_pc, void *& b);
+
+ Addr predictIUM(ITTageBranchInfo * bi);
+
+ void IUMUpdate(Addr target, void * b);
+
+ // Update fetch histories
+ //void fetchHistoryUpdate(Addr pc, uint16_t br_type, bool taken,
+ // Addr target, ThreadID tid);
+ void historyUpdate(ThreadID tid, Addr branch_pc, bool taken,
+ void * bp_history, const StaticInstPtr & inst,
+ Addr target) override;
+
+ void historyUpdate(ThreadID tid, Addr branch_pc, bool taken, void * b,
+ Addr target, bool at_fetch);
+
+ // Predictor update
+ void updateBrIndirect(Addr pc, uint16_t br_type, bool taken, Addr
target,
+ ThreadID tid, void * indirect_history);
+
+};
+#endif // __CPU_PRED_ITTAGE
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/15335
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: Ie0ee7fe31ffe5177042ffddd61d2099bcf14b9cb
Gerrit-Change-Number: 15335
Gerrit-PatchSet: 1
Gerrit-Owner: Jairo Balart <jairo.bal...@metempsy.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev