Pau Cabre has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/15956
Change subject: cpu: Added Bias-Free Neural branch Predictor (32KB)
......................................................................
cpu: Added Bias-Free Neural branch Predictor (32KB)
This is a branch predictor devised by Dibakar Gope and Mikko Lipasti
The original paper of the branch predictor can be found here:
https://www.jilp.org/cbp2014/paper/DibakarGope.pdf
And this is the original implementation:
https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz
Change-Id: Ia159c5e7430a387aa46f33b0ed1503bc515d9c73
Signed-off-by: Pau Cabre <pau.ca...@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
A src/cpu/pred/bfnp_32KB.cc
A src/cpu/pred/bfnp_32KB.hh
M src/cpu/pred/bpred_unit.cc
M src/cpu/pred/bpred_unit.hh
M src/cpu/pred/loop_predictor.hh
M src/cpu/pred/tage_base.cc
M src/cpu/pred/tage_base.hh
9 files changed, 1,269 insertions(+), 15 deletions(-)
diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index b9273b3..4088625 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -424,3 +424,72 @@
logInb = 7
+# Loop predictor for BFNP_32KB
+class BFNP_32KB_LoopPredictor(LoopPredictor):
+ type = 'BFNP_32KB_LoopPredictor'
+ cxx_class = 'BFNP_32KB_LoopPredictor'
+ cxx_header = "cpu/pred/bfnp_32KB.hh"
+
+ logSizeLoopPred = 6
+ useDirectionBit = True
+ restrictAllocation = True
+ initialLoopAge = 7
+ optionalAgeReset = False
+ initialLoopIter = 0
+ loopTableAgeBits = 3
+ loopTableConfidenceBits = 3
+ loopTableIterBits = 10
+ loopTableTagBits = 10
+ withLoopBits = 7
+
+ # This loop predictor uses hashing, but not in all the cases
+ # This is one of the reasons why we need a BFNP_32KB_LoopPredictor
+ # derived class (otherwise, the base LoopPredictor class could enough)
+ useHashing = True
+
+
+# 32KB Bias-Free Neural branch predictor as described in
+# https://www.jilp.org/cbp2014/paper/DibakarGope.pdf
+class BFNP_32KB(BranchPredictor):
+ type = 'BFNP_32KB'
+ cxx_class = 'BFNP_32KB'
+ cxx_header = "cpu/pred/bfnp_32KB.hh"
+
+ logBst = Param.Unsigned(13, "Log2 number of Branch Status Table
entries")
+ fHistLen = Param.Unsigned(128, "Length of the filtered history")
+ pHist = Param.Unsigned(65, "Length of the unfiltered path history")
+ trainTh = Param.Int(82, "Train threshold")
+
+ histLen = VectorParam.Unsigned(
+ [64, 80, 96, 112, 128, 160, 192, 256, 320, 416, 512, 1024],
+ "History lengths")
+
+ maxHist = Param.Unsigned(2048,
+ "maximum history length the predictor attempts to correalte with")
+
+ loop_predictor = Param.LoopPredictor(
+ BFNP_32KB_LoopPredictor(), "Loop predictor")
+
+ wtReg = Param.Unsigned(10, "Log2 number of bias weights")
+
+ wl1 = Param.Unsigned(11,
+ "Second dimension size of conventional perceptron weight table")
+
+ wl2 = Param.Unsigned(36, "Number of recency-stack entries")
+
+ wtRs = Param.Unsigned(15,
+ "Log2 numer of one-dimensional weight table entries")
+
+ paShiftHash = Param.Unsigned(2, "Number of bits to shift for hashing")
+ nFoldedHist = Param.Unsigned(128, "Folded history array size")
+ removeDupDist = Param.Unsigned(32, "Remove duplicate distance")
+
+ pHistSet = Param.Unsigned(36,
+ "Size of set for removing duplicated instances in filtered
history")
+
+ extraPHistStartDist = Param.Unsigned(64,
+ "Starting depth of histories for tracking in the filtered history")
+
+ extraPHistEndDist = Param.Unsigned(1024,
+ "Finishing depth of histories for tracking in the filtered
history")
+
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index 168a99b..66fbd6e 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -50,8 +50,10 @@
Source('tage_sc_l.cc')
Source('tage_sc_l_8KB.cc')
Source('tage_sc_l_64KB.cc')
+Source('bfnp_32KB.cc')
DebugFlag('FreeList')
DebugFlag('Branch')
DebugFlag('Tage')
DebugFlag('LTage')
DebugFlag('TageSCL')
+DebugFlag('BFNP')
diff --git a/src/cpu/pred/bfnp_32KB.cc b/src/cpu/pred/bfnp_32KB.cc
new file mode 100644
index 0000000..5900a77
--- /dev/null
+++ b/src/cpu/pred/bfnp_32KB.cc
@@ -0,0 +1,919 @@
+/*
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Pau Cabre
+ *
+ */
+
+/*
+ * 32KB Bias-Free Neural Predictor (devised by Dibakar Gope and Mikko
Lipasti)
+ *
+ * Most of the code in this file has been adapted from
cbp32KB/predictor.cc in
+ * https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz
+ */
+
+#include "cpu/pred/bfnp_32KB.hh"
+
+#include "debug/BFNP.hh"
+
+BFNP_32KB::BFNP_32KB(const BFNP_32KBParams *params)
+ : BPredUnit(params),
+ loopPredictor(params->loop_predictor),
+ ghrHeadPtr(0),
+ pHistHeadPtr(0),
+ filteredGhrHeadPtr(0),
+ numCondBr(0),
+ logBst(params->logBst),
+ wtReg(params->wtReg),
+ wl1(params->wl1),
+ wtRs(params->wtRs),
+ wl2(params->wl2),
+ paShiftHash(params->paShiftHash),
+ pHist(params->pHist),
+ nFoldedHist(params->nFoldedHist),
+ removeDupDist(params->removeDupDist),
+ pHistSet(params->pHistSet),
+ fHistLen(params->fHistLen),
+ extraPHistStartDist(params->extraPHistStartDist),
+ extraPHistEndDist(params->extraPHistEndDist),
+ histLen(params->histLen),
+ maxHist(params->maxHist),
+ threshold(params->trainTh),
+ thresholdCounter(0),
+ bstMask((1 << logBst)-1)
+{
+ bst_table = new bst_entry[1 << logBst];
+
+ weight_b = new int32_t[1<<(wtReg)];
+ weight_m = new int32_t*[1<<(wtReg)];
+ for (int i = 0; i < (1<<(wtReg)); ++i) {
+ weight_m[i] = new int32_t[wl1];
+ }
+ weight_rs = new int32_t[(1<<wtRs)];
+ pathAddr = new Addr[pHist];
+ folded_hist = new uint32_t[nFoldedHist];
+ ghr = new bool[nFoldedHist];
+ outPoint = new uint32_t[nFoldedHist];
+
+ isBrStable = new bool[pHist];
+
+ paDistConsd = new uint32_t[pHistSet];
+ paFound = new uint32_t[pHistSet];
+
+ nonDupFilteredGHR1 = new bool[wl2];
+ nonDupFilteredHA1 = new Addr[wl2];
+ nonDupFilteredHADist1 = new uint32_t[wl2];
+
+ filteredHADist = new uint32_t[fHistLen];
+ filteredHA = new uint32_t[fHistLen];
+ filteredGhr = new bool[fHistLen];
+
+ for (int i = 0; i < (1<<wtReg); ++i) {
+ for (int j = 0; j < wl1; ++j)
+ {
+ weight_m[i][j] = 0;
+ }
+ weight_b[i] = 0;
+ }
+
+ for (int i = 0; i < (1<<wtRs); ++i) {
+ weight_rs[i] = 0;
+ }
+
+ for (int i = 0; i < nFoldedHist; ++i) {
+ folded_hist[i] = 0;
+ ghr[i] = false;
+ }
+ for (int i = 0; i < wl1; ++i) {
+ outPoint[i] = (i+1) % wtReg;
+ }
+ for (int i = wl1; i < nFoldedHist; ++i) {
+ outPoint[i] = (i+1) % wtRs;
+ }
+
+ for (int i = 0; i < pHist; ++i) {
+ pathAddr[i] = 0;
+ isBrStable[i] = true;
+ }
+
+ for (int i = 0; i < pHistSet; ++i) {
+ paDistConsd[i] = 0;
+ paFound[i] = 0;
+ }
+
+ for (int i = 0; i < wl2; ++i) {
+ nonDupFilteredGHR1[i] = false;
+ nonDupFilteredHA1[i] = 0;
+ nonDupFilteredHADist1[i] = 0;
+ }
+
+ for (int i = 0; i < fHistLen; ++i) {
+ filteredHADist[i] = 0;
+ filteredHA[i] = 0;
+ filteredGhr[i] = false;
+ }
+}
+
+void
+BFNP_32KB::regStats()
+{
+ BPredUnit::regStats();
+
+ perceptronCorrect
+ .name(name() + ".perceptronCorrect")
+ .desc("Number of times the perceptron predictor is the provider
and "
+ "the prediction is correct");
+
+ perceptronWrong
+ .name(name() + ".perceptronWrong")
+ .desc("Number of times the perceptron predictor is the provider
and "
+ "the prediction is wrong");
+}
+
+void
+BFNP_32KB::uncondBranch(ThreadID tid, Addr pc, void * &bp_history)
+{
+ // Only conditional branches are tracked
+ // But we need to create a BFNPHistory in any case
+ BFNPHistory *history = new BFNPHistory(0, true);
+ bp_history = static_cast<void*>(history);
+ DPRINTF(BFNP, "Ingnoring unconditional branch for pc 0x%lx\n", pc);
+ return;
+}
+
+bool
+BFNP_32KB::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
+{
+ Addr pc = branch_addr >> instShiftAmt;
+ BFNPHistory *history = new BFNPHistory(wl1+wl2);
+ bp_history = static_cast<void*>(history);
+ int64_t accum = 0;
+ bool pred;
+ int corrConsd = 0;
+
+ uint32_t corrFoundCurCycle = 0;
+
+ if (bst_table[pc & bstMask].state != NON_BIASED) {
+ pred = (bst_table[pc & bstMask].state == TAKEN);
+ } else {
+ int non_dup_consd = 0;
+ int distantHist = 0;
+
+ // Accumulate bias weight
+ uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+ accum = accum + weight_b[bias_widx];
+
+ // Accumulate weights from the conventional perceptron predictor
weight
+ // table for 11 recent unfiltered branches
+ int histPtr = ghrHeadPtr;
+ int phistPtr = pHistHeadPtr;
+ for (int j=0; j <wl1; j++) {
+ uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtReg);
+ widx = widx ^ folded_hist[j];
+ widx = widx % (1<<wtReg);
+ if ( ghr[histPtr] == 1)
+ accum += weight_m[widx][j];
+ else
+ accum -= weight_m[widx][j];
+
+ history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] = ghr[histPtr];
+ corrFoundCurCycle++;
+
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+ phistPtr++;
+ if (phistPtr == pHist) phistPtr = 0;
+ }
+
+ // Accumulate weights from weight_rs table for Non_biased branches
that
+ // are at absolute distance 12 to distance 32 in the past global
+ // history. This is done to boost accuracy
+ histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+ phistPtr = (pHistHeadPtr+wl1)%pHist;
+ for (int j=wl1; j<removeDupDist; j++) {
+ if ((pathAddr[phistPtr] != 0) && (! isBrStable[phistPtr])){
+ uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtRs);
+ uint32_t distance = (j+1) ^ ((j+1) / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ widx = widx ^ folded_hist[j];
+ widx = widx % (1<<wtRs);
+ if (ghr[histPtr] == 1) // If history is Taken
+ accum += weight_rs[widx];
+ else // If history is Not-Taken
+ accum -= weight_rs[widx];
+ paDistConsd[distantHist] = j+1;
+
+ history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] = ghr[histPtr];
+ corrFoundCurCycle++;
+
+ distantHist++;
+ }
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+ phistPtr++;
+ if (phistPtr == pHist) phistPtr = 0;
+
+ }
+
+ // Accumulate weights from weight_rs table for Non_biased branches
+ // (present in the Recency Stack) that are at absolute distance
above
+ // 32 and below 1024 in the past global history
+ for (int j=0; ((distantHist < wl2) && (j<wl2)); j++, corrConsd++) {
+ int pathPCDist = nonDupFilteredHADist1[j];
+ if ((nonDupFilteredHA1[j] != 0) && (pathPCDist < 1024)){
+ uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+ uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ } else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+
+ if (nonDupFilteredGHR1[j] == 1) // If history is Taken
+ accum += weight_rs[widx];
+ else // If history is Not-Taken
+ accum -= weight_rs[widx];
+
+ paDistConsd[distantHist] = pathPCDist;
+
+ history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] =
nonDupFilteredGHR1[j];
+ corrFoundCurCycle++;
+
+ distantHist++;
+ } else {
+ break;
+ }
+ }
+
+ non_dup_consd = 0;
+ int prevPathPCDist = 0;
+
+ // On few traces, a small set of branches executes repeatedly in
the
+ // past history, as a result Recency Stack can not provide that
many
+ // branches to the prediction function since it contains only the
+ // latest occurrence of a branch. The following For loop with the
help
+ // of {filteredHA, filteredHADist, filteredGhr} and histLen array
+ // attempts to capture muliple instances of a branch in different
+ // distances in the past history to fill in that room and assists
the
+ // prediction function
+ // Accumulate weights from weight_rs table for multple instances of
+ // Non_biased branches present in different distances (at absolute
+ // distance of at least 64) present in the filtered history
+ int filteredHistPtr = filteredGhrHeadPtr;
+ for (int j=0; ((distantHist < wl2) && (j < fHistLen)); j++) {
+ int pathPCDist = filteredHADist[filteredHistPtr];
+ if ((pathPCDist > extraPHistStartDist) &&
+ (filteredHA[filteredHistPtr] != 0) &&
+ (pathPCDist < extraPHistEndDist)) {
+
+ int prevPathPCBin = 0;
+ int curPathPCBin = 0;
+ for (int it1 = 0; it1 < (histLen.size()-1); it1++) {
+ if ((pathPCDist >= histLen[it1]) &&
+ (pathPCDist < histLen[it1+1])) {
+ curPathPCBin = it1+1;
+ }
+ if ((prevPathPCDist >= histLen[it1]) &&
+ (prevPathPCDist < histLen[it1+1])) {
+ prevPathPCBin = it1+1;
+ }
+ }
+ if (prevPathPCBin != curPathPCBin) {
+ non_dup_consd = 0;
+ }
+ prevPathPCDist = pathPCDist;
+
+ uint32_t widx = 0;
+ uint32_t distance;
+ bool considered = false;
+ if (pathPCDist > removeDupDist) {
+ for (int it2 = 0;
+ ((it2 < non_dup_consd) && (it2 < pHistSet));
+ it2++)
+ {
+ if (paFound[it2] == filteredHA[filteredHistPtr]) {
+ considered = true;
+ break;
+ }
+ }
+ if (! considered) {
+ if (non_dup_consd < pHistSet) {
+
paFound[non_dup_consd]=filteredHA[filteredHistPtr];
+ non_dup_consd++;
+ }
+ }
+ }
+ // Ensures that particular history instance was not there
in
+ // the Recency Stack in last for loop
+ for (int it2 = 0; it2 < distantHist; it2++) {
+ if (paDistConsd[it2] == pathPCDist) {
+ considered = true;
+ break;
+ }
+ }
+ if (! considered) {
+ paDistConsd[distantHist] = pathPCDist;
+ distantHist++;
+ }
+ if (! considered) {
+ widx = gen_widx(pc, filteredHA[filteredHistPtr], wtRs);
+ distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ }
+ else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+ if (filteredGhr[filteredHistPtr] == 1) {
+ // If history is Taken
+ accum += weight_rs[widx];
+ } else {
+ // If history is Not-Taken
+ accum -= weight_rs[widx];
+ }
+
+ history->idxConsd[corrFoundCurCycle] = widx;
+
+ history->dirConsd[corrFoundCurCycle] =
+ filteredGhr[filteredHistPtr];
+
+ corrFoundCurCycle++;
+ }
+ }
+ else if ((filteredHA[filteredHistPtr] == 0) ||
+ (pathPCDist >= 1024)) {
+ break;
+ }
+ filteredHistPtr++;
+ if (filteredHistPtr == fHistLen) {
+ filteredHistPtr = 0;
+ }
+ }
+
+ // Accumulate weights from weight_rs table for Non_biased branches
+ // (present in the Recency Stack) that are at absolute distance
+ // above 1024 in the past global history
+ for (int j=corrConsd;((distantHist < wl2) && (j<wl2)); j++) {
+ int pathPCDist = nonDupFilteredHADist1[j];
+ if ((nonDupFilteredHA1[j] != 0) &&
+ (pathPCDist >= 1024) &&
+ (pathPCDist <= maxHist)){
+ uint32_t widx = 0;
+ uint32_t distance;
+ widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+ distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ }
+ else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+ if (nonDupFilteredGHR1[j] == 1) // If history is Taken
+ accum += weight_rs[widx];
+ else // If history is Not-Taken
+ accum -= weight_rs[widx];
+ paDistConsd[distantHist] = pathPCDist;
+
+ history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] =
nonDupFilteredGHR1[j];
+ corrFoundCurCycle++;
+
+ distantHist++;
+ }
+ else if ((nonDupFilteredHA1[j] == 0) || (pathPCDist >
maxHist)){
+ break;
+ }
+ }
+
+ pred = (accum >= 0);
+
+ }
+
+ history->corrFoundCurCycle = corrFoundCurCycle;
+
+ // true means trainning needed
+ history->hTrain = (accum>-threshold && accum<threshold);
+
+ history->prcptPred = pred;
+
+ // This is only used by the loop predictor at update time
+ history->loopPredictor.predTaken = pred;
+
+ // note that we are shifting the PC by instShiftAmt instead of passing
+ // instShiftAmt as the shift argument
+ // The results are slightly different, and this is needed to match the
+ // original implementation
+ pred = loopPredictor->loopPredict(tid, branch_addr >> instShiftAmt,
true,
+ &(history->loopPredictor), pred, 0);
+
+ history->predDir = pred;
+
+ DPRINTF(BFNP, "Prediction results for pc 0x%lx => pred %d, "
+ "loopPredUsed: %d, prcptPred: %d, hTrain: %d, accum: %d, "
+ "corrFoundCurCycle: %d\n",
+ branch_addr, pred, history->loopPredictor.loopPredUsed,
+ history->prcptPred, history->hTrain, accum, corrFoundCurCycle);
+
+ return pred;
+}
+
+void
+BFNP_32KB::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history)
+{
+ // No speculation supported as of now, so do nothing
+ DPRINTF(BFNP, "Ingnoring btbUpdate for pc 0x%lx\n", branch_addr);
+}
+
+void
+BFNP_32KB::update(ThreadID tid, Addr branch_addr, bool taken, void
*bp_history,
+ bool squashed, const StaticInstPtr & inst, Addr
corrTarget)
+{
+ assert(bp_history);
+
+ BFNPHistory *history = static_cast<BFNPHistory*>(bp_history);
+
+ if (squashed) {
+ // No speculation supported, so do nothing
+ return;
+ }
+
+ if (history->isUncondBranch) {
+ delete history;
+ return;
+ }
+
+ // update stats
+ if (history->loopPredictor.loopPredUsed) {
+ loopPredictor->updateStats(taken, &(history->loopPredictor));
+ } else if (history->prcptPred == taken) {
+ perceptronCorrect++;
+ } else {
+ perceptronWrong++;
+ }
+
+ loopPredictor->condBranchUpdate(tid, branch_addr >> instShiftAmt,
+ taken, history->prcptPred, &(history->loopPredictor), 0,
+ getRandom(), getRandom(), getRandom());
+
+ Addr pc = branch_addr >> instShiftAmt;
+
+ bool t = taken;
+
+ int corrConsd = 0;
+
+ if (bst_table[pc & bstMask].state == NOT_FOUND) {
+ bst_table[pc & bstMask].state = taken ? TAKEN : NOT_TAKEN;
+ }
+ else if (((bst_table[pc & bstMask].state == TAKEN) ||
+ (bst_table[pc & bstMask].state == NOT_TAKEN)) &&
+ (t != ((bst_table[pc & bstMask].state == TAKEN)))) {
+ // Detect the current branch as Non-biased
+ bst_table[pc & bstMask].state = NON_BIASED;
+
+ // In the update phase, the predictor table indexes has been
computed
+ // in the same way as done in the prediction proceude before
+ uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+ if (t == 1) {
+ if (weight_b[bias_widx] < 31) weight_b[bias_widx]++;
+ } else {
+ if (weight_b[bias_widx] > -32) weight_b[bias_widx]--;
+ }
+
+ int non_dup_consd = 0;
+ int distantHist = 0;
+
+ int histPtr = ghrHeadPtr;
+ int phistPtr = pHistHeadPtr;
+ for (int j=0; j < wl1; j++) {
+ uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtReg);
+ widx = widx ^ folded_hist[j];
+ widx = widx % (1<<wtReg);
+ if (t == ghr[histPtr]) {
+ if (weight_m[widx][j]<31) weight_m[widx][j]++;
+ } else if (t != ghr[histPtr]) {
+ if (weight_m[widx][j]>-32) weight_m[widx][j]--;
+ }
+
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+ phistPtr++;
+ if (phistPtr == pHist) phistPtr = 0;
+
+ }
+
+ histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+ phistPtr = (pHistHeadPtr+wl1)%pHist;
+ for (int j=wl1;j<removeDupDist; j++) {
+ if ((pathAddr[phistPtr] != 0) && (! isBrStable[phistPtr])){
+ uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtRs);
+ uint32_t distance = (j+1) ^ ((j+1) / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ widx = widx ^ folded_hist[j];
+ widx = widx % (1<<wtRs);
+ if (t == ghr[histPtr]) {
+ if (weight_rs[widx]<15) weight_rs[widx]++;
+ } else if (t != ghr[histPtr]) {
+ if (weight_rs[widx]>-16) weight_rs[widx]--;
+ }
+ paDistConsd[distantHist] = j+1;
+ distantHist++;
+ }
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+ phistPtr++;
+ if (phistPtr == pHist) phistPtr = 0;
+
+ }
+
+ for (int j=0;((distantHist < wl2) && (j<wl2)); j++, corrConsd++) {
+ int pathPCDist = nonDupFilteredHADist1[j];
+ if ((nonDupFilteredHA1[j] != 0) && (pathPCDist < 1024)) {
+ uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+ uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ } else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+ if (t == nonDupFilteredGHR1[j]) {
+ if (weight_rs[widx]<15) weight_rs[widx]++;
+ } else if (t != nonDupFilteredGHR1[j]) {
+ if (weight_rs[widx]>-16) weight_rs[widx]--;
+ }
+ paDistConsd[distantHist] = pathPCDist;
+ distantHist++;
+ } else {
+ break;
+ }
+ }
+
+ non_dup_consd = 0;
+ int prevPathPCDist = 0;
+ int filteredHistPtr = filteredGhrHeadPtr;
+
+ for (int j=0; ((distantHist < wl2) && (j < fHistLen)); j++) {
+ int pathPCDist = filteredHADist[filteredHistPtr];
+ if ((pathPCDist > extraPHistStartDist) &&
+ (filteredHA[filteredHistPtr] != 0) &&
+ (pathPCDist < extraPHistEndDist)){
+
+ int prevPathPCBin = 0;
+ int curPathPCBin = 0;
+ for (int it1 = 0; it1 < (histLen.size()-1); it1++) {
+ if ((pathPCDist >= histLen[it1]) &&
+ (pathPCDist < histLen[it1+1])) {
+ curPathPCBin = it1+1;
+ }
+ if ((prevPathPCDist >= histLen[it1]) &&
+ (prevPathPCDist < histLen[it1+1])) {
+ prevPathPCBin = it1+1;
+ }
+ }
+ if (prevPathPCBin != curPathPCBin) {
+ non_dup_consd = 0;
+ }
+ prevPathPCDist = pathPCDist;
+
+ uint32_t widx = 0;
+ uint32_t distance;
+ bool considered = false;
+ if (pathPCDist > removeDupDist) {
+ for (int it2 = 0;
+ ((it2 < non_dup_consd) && (it2 < pHistSet));
+ it2++)
+ {
+ if (paFound[it2] == filteredHA[filteredHistPtr]) {
+ considered = true;
+ break;
+ }
+ }
+ if (! considered) {
+ if (non_dup_consd < pHistSet) {
+
paFound[non_dup_consd]=filteredHA[filteredHistPtr];
+ non_dup_consd++;
+ }
+ }
+ }
+
+ for (int it2 = 0; it2 <distantHist; it2++) {
+ if (paDistConsd[it2] == pathPCDist) {
+ considered = true;
+ break;
+ }
+ }
+ if (! considered) {
+ paDistConsd[distantHist] = pathPCDist;
+ distantHist++;
+ }
+ if (! considered) {
+ widx = gen_widx(pc, filteredHA[filteredHistPtr], wtRs);
+ distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ } else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+ if (t == filteredGhr[filteredHistPtr]) {
+ if (weight_rs[widx]<15) weight_rs[widx]++;
+ } else if (t != filteredGhr[filteredHistPtr]) {
+ if (weight_rs[widx]>-16) weight_rs[widx]--;
+ }
+ }
+ } else if ((filteredHA[filteredHistPtr] == 0) ||
+ (pathPCDist >= 1024)) {
+ break;
+ }
+ filteredHistPtr++;
+ if (filteredHistPtr == fHistLen) filteredHistPtr = 0;
+
+ }
+
+ for (int j=corrConsd;((distantHist < wl2) && (j<wl2)); j++) {
+ int pathPCDist = nonDupFilteredHADist1[j];
+ if ((nonDupFilteredHA1[j] != 0) && (pathPCDist >= 1024) &&
+ (pathPCDist <= maxHist)) {
+ uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+ uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+ widx = widx ^ (distance);
+ widx = widx % (1<<wtRs);
+ if ((pathPCDist-1) < nFoldedHist) {
+ widx = widx ^ folded_hist[pathPCDist-1];
+ widx = widx % (1<<wtRs);
+ } else {
+ widx = widx ^ folded_hist[nFoldedHist-1];
+ widx = widx % (1<<wtRs);
+ }
+ if (t == nonDupFilteredGHR1[j]) {
+ if (weight_rs[widx]<15) weight_rs[widx]++;
+ } else if (t != nonDupFilteredGHR1[j]) {
+ if (weight_rs[widx]>-16) weight_rs[widx]--;
+ }
+ paDistConsd[distantHist] = pathPCDist;
+ distantHist++;
+ } else if ((nonDupFilteredHA1[j] == 0) || (pathPCDist >
maxHist)) {
+ break;
+ }
+ }
+ } else if (((bst_table[pc & bstMask].state == TAKEN) ||
+ (bst_table[pc & bstMask].state == NOT_TAKEN)) &&
+ (t == ((bst_table[pc & bstMask].state == TAKEN)))) {
+ // nothing
+ } else if ( (t != history->predDir) || history->hTrain) {
+ //Training needed if threshold not exceeded or predict wrong
+ uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+ if (t == 1) {
+ if (weight_b[bias_widx] < 31) weight_b[bias_widx]++;
+ } else {
+ if (weight_b[bias_widx] > -32) weight_b[bias_widx]--;
+ }
+
+ for (int j = 0; j < wl1; j++) {
+ if (t == history->dirConsd[j]) {
+ if (weight_m[history->idxConsd[j]][j]<31) {
+ weight_m[history->idxConsd[j]][j]++;
+ }
+ } else if (t != history->dirConsd[j]) {
+ if (weight_m[history->idxConsd[j]][j]>-32) {
+ weight_m[history->idxConsd[j]][j]--;
+ }
+ }
+ }
+ for (int j = wl1; j < history->corrFoundCurCycle; j++) {
+ if (t == history->dirConsd[j]) {
+ if (weight_rs[history->idxConsd[j]]<15) {
+ weight_rs[history->idxConsd[j]]++;
+ }
+ } else if (t != history->dirConsd[j]) {
+ if (weight_rs[history->idxConsd[j]]>-16) {
+ weight_rs[history->idxConsd[j]]--;
+ }
+ }
+ }
+ }
+
+ // threshold adjusting
+ if (bst_table[pc & bstMask].state == NON_BIASED) {
+ if (t != history->predDir) {
+ thresholdCounter++;
+ if (thresholdCounter==63) {
+ thresholdCounter = 0;
+ threshold++;
+ }
+ } else if ((t==history->predDir) && history->hTrain) {
+ thresholdCounter--;
+ if (thresholdCounter==-63) {
+ thresholdCounter = 0;
+ threshold--;
+ }
+ }
+ }
+
+ // Compute folded history in each bit position upto nFoldedHist bits
based
+ // upon the folded history already computed before in the last cycle
+ int histPtr = ghrHeadPtr;
+ for (int hiter = 0; hiter < wl1; hiter++) {
+ folded_hist[hiter] = (folded_hist[hiter] << 1) | (taken?1:0);
+ folded_hist[hiter] ^= ((ghr[histPtr]?1:0) << outPoint[hiter]);
+ folded_hist[hiter] ^= (folded_hist[hiter] >> wtReg);
+ folded_hist[hiter] &= (1 << wtReg) - 1;
+
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+
+ }
+ histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+ for (int hiter = wl1; hiter < nFoldedHist; hiter++) {
+ folded_hist[hiter] = (folded_hist[hiter] << 1) | (taken?1:0);
+ folded_hist[hiter] ^= ((ghr[histPtr]?1:0) << outPoint[hiter]);
+ folded_hist[hiter] ^= (folded_hist[hiter] >> wtRs);
+ folded_hist[hiter] &= (1 << wtRs) - 1;
+
+ histPtr++;
+ if (histPtr == nFoldedHist) histPtr = 0;
+
+ }
+
+ // If the current branch is "completely biased or stable" branch?
+ bool isStableBranch = (bst_table[pc & bstMask].state != NON_BIASED);
+
+ // Update unfiltered GHR and path history
+ ghrHeadPtr--;
+ if (ghrHeadPtr == -1) ghrHeadPtr = nFoldedHist-1;
+ ghr[ghrHeadPtr] = taken;
+ pHistHeadPtr--;
+ if (pHistHeadPtr == -1) pHistHeadPtr = pHist-1;
+ pathAddr[pHistHeadPtr] = pc;
+ isBrStable[pHistHeadPtr] = isStableBranch;
+
+ // Update filtered history used to boost accuracy and provide multiple
+ // instances of a branch to the prediction function if required
+ for (int j=0; j< fHistLen; j++) {
+ if (filteredHA[j] != 0) filteredHADist[j]++;
+ }
+
+ if (numCondBr >= extraPHistStartDist) {
+ if (! isBrStable[(pHistHeadPtr+extraPHistStartDist)%pHist]) {
+ filteredGhrHeadPtr--;
+ if (filteredGhrHeadPtr == -1) filteredGhrHeadPtr = fHistLen-1;
+
+ filteredGhr[filteredGhrHeadPtr] =
+ ghr[(ghrHeadPtr+extraPHistStartDist)%nFoldedHist];
+
+ filteredHA[filteredGhrHeadPtr] =
+ pathAddr[(pHistHeadPtr+extraPHistStartDist)%pHist];
+
+ filteredHADist[filteredGhrHeadPtr] = extraPHistStartDist+1;
+ }
+ }
+
+ // Update the absolute distance of the latest occurrence of the
branches
+ // present in Recency Stack (RS)
+ for (int j=0; j<wl2; j++) {
+ if (nonDupFilteredHA1[j] != 0) nonDupFilteredHADist1[j]++;
+ }
+ // If the current branch is Non-baised, insert that into the
+ // Recency Stack (RS)
+ if (numCondBr >= removeDupDist) {
+ if (! isBrStable[(pHistHeadPtr+removeDupDist)%pHist]) {
+ int j = 0;
+ for (j=0; j<wl2; j++) {
+ const uint32_t idx = (pHistHeadPtr+removeDupDist)%pHist;
+ if (pathAddr[idx] == nonDupFilteredHA1[j]) {
+ //Find if a prior occurrence is already present in RS
+ j++;
+ break;
+ }
+ }
+ for (int k = j-1; k>0; k--) {
+ nonDupFilteredGHR1[k] = nonDupFilteredGHR1[k-1];
+ nonDupFilteredHA1[k]=nonDupFilteredHA1[k-1];
+ nonDupFilteredHADist1[k]=nonDupFilteredHADist1[k-1];
+ }
+
nonDupFilteredGHR1[0]=ghr[(ghrHeadPtr+removeDupDist)%nFoldedHist];
+
nonDupFilteredHA1[0]=pathAddr[(pHistHeadPtr+removeDupDist)%pHist];
+ nonDupFilteredHADist1[0]=removeDupDist + 1;
+ }
+ }
+
+ numCondBr++;
+
+ delete history;
+}
+
+void
+BFNP_32KB::squash(ThreadID tid, void *bp_history)
+{
+ BFNPHistory * history = static_cast<BFNPHistory*>(bp_history);
+ // No speculation supported, so nothing needs to be restored
+ delete history;
+}
+
+/*----- Hash function that calculating index of bias table -----*/
+uint32_t
+BFNP_32KB::gen_bias_widx(Addr cur_pc, uint32_t wt_size)
+{
+ cur_pc = (cur_pc ) ^ (cur_pc / (1<<wt_size));
+ uint32_t widx = cur_pc;
+ widx = widx % (1<<wt_size);
+ return widx;
+}
+
+// up-down saturating counter
+void
+BFNP_32KB::ctrupdate(int32_t & ctr, bool taken, int nbits)
+{
+ if (taken) {
+ if (ctr < ((1 << (nbits - 1)) - 1))
+ ctr++;
+ } else {
+ if (ctr > -(1 << (nbits - 1)))
+ ctr--;
+ }
+}
+
+// Hash function that calculating index of weight tables
+uint32_t
+BFNP_32KB::gen_widx(Addr cur_pc, Addr path_pc, uint32_t wt_size) const
+{
+ cur_pc = (cur_pc ) ^ (cur_pc / (1<<wt_size));
+ path_pc = path_pc >> paShiftHash;
+ path_pc = (path_pc) ^ (path_pc / (1<<wt_size));
+ uint32_t widx = cur_pc ^ (path_pc);
+ widx = widx % (1<<wt_size);
+ return widx;
+}
+
+BFNP_32KB*
+BFNP_32KBParams::create()
+{
+ return new BFNP_32KB(this);
+}
+
+bool
+BFNP_32KB_LoopPredictor::optionalAgeInc(int nrand) const
+{
+ return (nrand & 7) == 0;
+}
+
+int
+BFNP_32KB_LoopPredictor::lindex(Addr pc_in, unsigned instShiftAmt) const
+{
+ return (((pc_in >> instShiftAmt) & loopSetMask) << logLoopTableAssoc);
+}
+
+BFNP_32KB_LoopPredictor*
+BFNP_32KB_LoopPredictorParams::create()
+{
+ return new BFNP_32KB_LoopPredictor(this);
+}
+
diff --git a/src/cpu/pred/bfnp_32KB.hh b/src/cpu/pred/bfnp_32KB.hh
new file mode 100644
index 0000000..1af68d0
--- /dev/null
+++ b/src/cpu/pred/bfnp_32KB.hh
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Pau Cabre
+ *
+ */
+
+/*
+ * 32KB Bias-Free Neural Predictor (devised by Dibakar Gope and Mikko
Lipasti)
+ *
+ * Most of the code in this file has been adapted from cbp32KB/predictor.h
in
+ * https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz
+ */
+
+#ifndef __CPU_PRED_BFNP_32KB_HH
+#define __CPU_PRED_BFNP_32KB_HH
+
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/pred/bpred_unit.hh"
+#include "cpu/pred/loop_predictor.hh"
+#include "params/BFNP_32KB.hh"
+#include "params/BFNP_32KB_LoopPredictor.hh"
+
+class BFNP_32KB_LoopPredictor : public LoopPredictor {
+ public:
+ BFNP_32KB_LoopPredictor(BFNP_32KB_LoopPredictorParams *p)
+ : LoopPredictor(p)
+ {}
+
+ bool optionalAgeInc(int nrand) const override;
+ int lindex(Addr pc_in, unsigned instShiftAmt) const override;
+};
+
+class BFNP_32KB : public BPredUnit
+{
+ public:
+ /**
+ * Default branch predictor constructor.
+ */
+ BFNP_32KB(const BFNP_32KBParams *params);
+
+ void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) override;
+
+ /**
+ * Looks up the given address in the branch predictor and returns
+ * a true/false value as to whether it is taken.
+ * @param branch_addr The address of the branch to look up.
+ * @param bp_history Pointer to any bp history state.
+ * @return Whether or not the branch is taken.
+ */
+ bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
override;
+
+ /**
+ * Updates the branch predictor to Not Taken if a BTB entry is
+ * invalid or not found.
+ * @param branch_addr The address of the branch to look up.
+ * @param bp_history Pointer to any bp history state.
+ * @return Whether or not the branch is taken.
+ */
+ void btbUpdate(ThreadID tid, Addr branch_addr,
+ void * &bp_history) override;
+
+ /**
+ * Updates the branch predictor with the actual result of a branch.
+ * @param branch_addr The address of the branch to update.
+ * @param taken Whether or not the branch was taken.
+ */
+ void update(ThreadID tid, Addr branch_addr, bool taken,
+ void *bp_history, bool squashed, const StaticInstPtr &
inst,
+ Addr corrTarget) override;
+
+ void squash(ThreadID tid, void *bp_history) override;
+
+ void regStats() override;
+
+ private:
+
+ /** The loop predictor object */
+ LoopPredictor *loopPredictor;
+
+ struct BFNPHistory
+ {
+ LoopPredictor::BranchInfo loopPredictor;
+
+ std::vector<uint32_t> idxConsd;
+ std::vector<bool> dirConsd;
+
+ uint32_t corrFoundCurCycle;
+
+ bool hTrain;
+ bool prcptPred;
+ bool predDir;
+ bool isUncondBranch;
+
+ BFNPHistory(uint32_t size, bool isUncondBranch = false)
+ : loopPredictor(),
+ isUncondBranch(isUncondBranch)
+ {
+ corrFoundCurCycle = 0;
+ idxConsd.resize(size);
+ dirConsd.resize(size);
+ }
+ };
+
+ enum BST_STATE {
+ NOT_FOUND,
+ TAKEN,
+ NOT_TAKEN,
+ NON_BIASED
+ };
+
+ struct bst_entry
+ {
+ BST_STATE state;
+ bst_entry()
+ : state(NOT_FOUND)
+ {
+ }
+ };
+
+ bst_entry * bst_table; // branch status table
+
+ /* --- weight tables --- */
+ int32_t *weight_b; //1-dimensional bias weight table
+ int32_t **weight_m; //2-dimensional conventional perceptron predictor
+ // weight table
+ int32_t *weight_rs; //1-dimensional weight table as proposed in the
writeup
+
+ Addr *pathAddr; // Path address register containing the address of past
+ // branches without any filtering
+ // It was 32-bits in the original implementation, and
+ // it has been extended to 64-bits here
+
+ uint32_t *folded_hist; // folded history array; n-th bit in this array
+ // computes folded history for the history bits
from
+ // the n-th positon in the global unfiltered
history
+ // to the current branch; Please Note that, the
+ // space occupied for this small folded_history
+ // array is NOT included in the storage budget,
as
+ // this can be easily computed from the GHR
register
+ // during the prediction computation for each
and
+ // every bit position; Storing the folded
history in
+ // the folded_hist array simply speeds up the
+ // simulation.
+
+ bool *ghr; // Unfiltered history shift register used for
+ // prediction and updates
+
+ bool *isBrStable; // A shift register/circular buffer indicating
if a
+ // branch in PA was stable the time it was
inserted
+ // into that
+
+ uint32_t *paDistConsd; // Set containing the absolute distances of
+ // branches, used later in the code to remove
+ // duplicate instances of a branch in the
prediction
+ // computation
+
+ uint32_t *paFound; // Set containing some path addresses used
later in
+ // the code to remove duplicate instances of a
+ // branch in the prediction computation
+
+ // Folloing three variables are the three fields in Recency Stack (RS)
+ // defined in the paper
+ bool *nonDupFilteredGHR1; // used for RS[].H to contain the
latest
+ // outcome of a branch
+ Addr *nonDupFilteredHA1; // used for RS[].A to contain branch
+ // address/tag (hashed down to
required
+ // number of bits)
+ uint32_t *nonDupFilteredHADist1; // used for RS[].P to contain absolute
+ // distance of the latest occurrence
of a
+ // branch in global history
+
+ bool *filteredGhr; // outcome of the branch in filtered
history
+ uint32_t *filteredHADist; // absolute distance/depth of the branch
in the
+ // global history
+ uint32_t *filteredHA; // address of the branch in filtered
history
+
+ uint32_t *outPoint;
+
+ int32_t ghrHeadPtr; // pointer to the start of the circular
+ // buffer GHR
+ int32_t pHistHeadPtr; // pointer to the start of the circular
buffer
+ // PA and isBrStable
+ int32_t filteredGhrHeadPtr; // pointer to the start of the circular
buffer
+ // filteredGHR, filteredGHRHA and
+ // filteredGHRDist
+
+ uint32_t numCondBr; //counter to count number of retired cond. branches
+
+ const uint32_t logBst;
+ const uint32_t wtReg;
+ const uint32_t wl1;
+ const uint32_t wtRs;
+ const uint32_t wl2;
+ const uint32_t paShiftHash;
+ const uint32_t pHist;
+ const uint32_t nFoldedHist;
+ const uint32_t removeDupDist;
+
+ // size of a set used to remove duplicate instances in filtered
+ // histories {filteredGhr, filteredHA, filteredHADist}
+ const uint32_t pHistSet;
+
+ // tracks filtered histories that appeared at least at depth of
+ // extraPHistStartDist in the global history;used for boosting the
accuracy
+ const uint32_t fHistLen;
+
+ const uint32_t extraPHistStartDist;
+ const uint32_t extraPHistEndDist;
+
+ // histLen array is used in conjuntion with filtered history as defined
+ // below in the code and is used to boost accuracy and provide multiple
+ // instances of a branch present in the past global history to the
+ // prediction function if required
+ const std::vector<uint32_t> histLen;
+
+ const uint32_t maxHist;
+
+ int32_t threshold; // dynamic threshold value as in O-GEHL
+ int32_t thresholdCounter; // threshold counter as in O-GEHL
+
+ const uint32_t bstMask;
+
+ // Hash function that calculating index of bias table
+ static uint32_t gen_bias_widx(Addr cur_pc, uint32_t wt_size);
+
+ // up-down saturating counter
+ static void ctrupdate(int32_t & ctr, bool taken, int nbits);
+
+ // Hash function that calculating index of weight tables
+ uint32_t gen_widx(Addr cur_pc, Addr path_pc, uint32_t wt_size) const;
+
+ // stats
+ Stats::Scalar perceptronCorrect;
+ Stats::Scalar perceptronWrong;
+};
+
+#endif // __CPU_PRED_BFNP_32KB_HH
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index cdfa9c8..0baa597 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -49,6 +49,7 @@
#include "arch/isa_traits.hh"
#include "arch/types.hh"
#include "arch/utility.hh"
+#include "base/random.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "debug/Branch.hh"
@@ -492,6 +493,12 @@
}
}
+int
+BPredUnit::getRandom() const
+{
+ return random_mt.random<int>();
+}
+
void
BPredUnit::dump()
{
diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh
index 7f68cf5..d0b7512 100644
--- a/src/cpu/pred/bpred_unit.hh
+++ b/src/cpu/pred/bpred_unit.hh
@@ -322,6 +322,13 @@
const unsigned instShiftAmt;
/**
+ * Algorithm for returning a random number
+ * This base class just uses random_mt, but some derived classes
+ * may want to use a more realistic implementation or force some values
+ */
+ int getRandom() const;
+
+ /**
* @{
* @name PMU Probe points.
*/
diff --git a/src/cpu/pred/loop_predictor.hh b/src/cpu/pred/loop_predictor.hh
index d55591b..59717a8 100644
--- a/src/cpu/pred/loop_predictor.hh
+++ b/src/cpu/pred/loop_predictor.hh
@@ -147,7 +147,7 @@
* @param pc_in The unshifted branch PC.
* @param instShiftAmt Shift the pc by as many bits
*/
- int lindex(Addr pc_in, unsigned instShiftAmt) const;
+ virtual int lindex(Addr pc_in, unsigned instShiftAmt) const;
/**
* Computes the index used to access the
diff --git a/src/cpu/pred/tage_base.cc b/src/cpu/pred/tage_base.cc
index 7fba790..499ff8b 100644
--- a/src/cpu/pred/tage_base.cc
+++ b/src/cpu/pred/tage_base.cc
@@ -42,7 +42,6 @@
#include "base/intmath.hh"
#include "base/logging.hh"
-#include "base/random.hh"
#include "base/trace.hh"
#include "debug/Fetch.hh"
#include "debug/Tage.hh"
@@ -630,12 +629,6 @@
return;
}
-int
-TAGEBase::getRandom() const
-{
- return random_mt.random<int>();
-}
-
void
TAGEBase::updateStats(bool taken, TageBranchInfo* bi)
{
diff --git a/src/cpu/pred/tage_base.hh b/src/cpu/pred/tage_base.hh
index ebd5053..9cb803e 100644
--- a/src/cpu/pred/tage_base.hh
+++ b/src/cpu/pred/tage_base.hh
@@ -425,13 +425,6 @@
*/
virtual void extraAltCalc(TageBranchInfo* bi);
- /**
- * Algorithm for returning a random number
- * This base TAGE class just uses random_mt, but some derived classes
- * may want to use a more realistic implementation or force some values
- */
- int getRandom() const;
-
const unsigned logRatioBiModalHystEntries;
const unsigned nHistoryTables;
const unsigned tagTableCounterBits;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/15956
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: Ia159c5e7430a387aa46f33b0ed1503bc515d9c73
Gerrit-Change-Number: 15956
Gerrit-PatchSet: 1
Gerrit-Owner: Pau Cabre <pau.ca...@metempsy.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev