Pau Cabre has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/15956

Change subject: cpu: Added Bias-Free Neural branch Predictor (32KB)
......................................................................

cpu: Added Bias-Free Neural branch Predictor (32KB)

This is a branch predictor devised by Dibakar Gope and Mikko Lipasti

The original paper of the branch predictor can be found here:
https://www.jilp.org/cbp2014/paper/DibakarGope.pdf

And this is the original implementation:
https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz

Change-Id: Ia159c5e7430a387aa46f33b0ed1503bc515d9c73
Signed-off-by: Pau Cabre <pau.ca...@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
A src/cpu/pred/bfnp_32KB.cc
A src/cpu/pred/bfnp_32KB.hh
M src/cpu/pred/bpred_unit.cc
M src/cpu/pred/bpred_unit.hh
M src/cpu/pred/loop_predictor.hh
M src/cpu/pred/tage_base.cc
M src/cpu/pred/tage_base.hh
9 files changed, 1,269 insertions(+), 15 deletions(-)



diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py
index b9273b3..4088625 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -424,3 +424,72 @@

     logInb = 7

+# Loop predictor for BFNP_32KB
+class BFNP_32KB_LoopPredictor(LoopPredictor):
+    type = 'BFNP_32KB_LoopPredictor'
+    cxx_class  = 'BFNP_32KB_LoopPredictor'
+    cxx_header = "cpu/pred/bfnp_32KB.hh"
+
+    logSizeLoopPred = 6
+    useDirectionBit = True
+    restrictAllocation = True
+    initialLoopAge = 7
+    optionalAgeReset = False
+    initialLoopIter = 0
+    loopTableAgeBits = 3
+    loopTableConfidenceBits = 3
+    loopTableIterBits = 10
+    loopTableTagBits = 10
+    withLoopBits = 7
+
+    # This loop predictor uses hashing, but not in all the cases
+    # This is one of the reasons why we need a BFNP_32KB_LoopPredictor
+    # derived class (otherwise, the base LoopPredictor class could enough)
+    useHashing = True
+
+
+# 32KB Bias-Free Neural branch predictor as described in
+# https://www.jilp.org/cbp2014/paper/DibakarGope.pdf
+class BFNP_32KB(BranchPredictor):
+    type = 'BFNP_32KB'
+    cxx_class = 'BFNP_32KB'
+    cxx_header = "cpu/pred/bfnp_32KB.hh"
+
+ logBst = Param.Unsigned(13, "Log2 number of Branch Status Table entries")
+    fHistLen = Param.Unsigned(128, "Length of the filtered history")
+    pHist = Param.Unsigned(65, "Length of the unfiltered path history")
+    trainTh = Param.Int(82, "Train threshold")
+
+    histLen = VectorParam.Unsigned(
+        [64, 80, 96, 112, 128, 160, 192, 256, 320, 416, 512, 1024],
+        "History lengths")
+
+    maxHist = Param.Unsigned(2048,
+        "maximum history length the predictor attempts to correalte with")
+
+    loop_predictor = Param.LoopPredictor(
+        BFNP_32KB_LoopPredictor(), "Loop predictor")
+
+    wtReg = Param.Unsigned(10, "Log2 number of bias weights")
+
+    wl1 = Param.Unsigned(11,
+        "Second dimension size of conventional perceptron weight table")
+
+    wl2 = Param.Unsigned(36, "Number of recency-stack entries")
+
+    wtRs = Param.Unsigned(15,
+        "Log2 numer of one-dimensional weight table entries")
+
+    paShiftHash = Param.Unsigned(2, "Number of bits to shift for hashing")
+    nFoldedHist = Param.Unsigned(128, "Folded history array size")
+    removeDupDist = Param.Unsigned(32, "Remove duplicate distance")
+
+    pHistSet = Param.Unsigned(36,
+ "Size of set for removing duplicated instances in filtered history")
+
+    extraPHistStartDist = Param.Unsigned(64,
+        "Starting depth of histories for tracking in the filtered history")
+
+    extraPHistEndDist = Param.Unsigned(1024,
+ "Finishing depth of histories for tracking in the filtered history")
+
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index 168a99b..66fbd6e 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -50,8 +50,10 @@
 Source('tage_sc_l.cc')
 Source('tage_sc_l_8KB.cc')
 Source('tage_sc_l_64KB.cc')
+Source('bfnp_32KB.cc')
 DebugFlag('FreeList')
 DebugFlag('Branch')
 DebugFlag('Tage')
 DebugFlag('LTage')
 DebugFlag('TageSCL')
+DebugFlag('BFNP')
diff --git a/src/cpu/pred/bfnp_32KB.cc b/src/cpu/pred/bfnp_32KB.cc
new file mode 100644
index 0000000..5900a77
--- /dev/null
+++ b/src/cpu/pred/bfnp_32KB.cc
@@ -0,0 +1,919 @@
+/*
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Pau Cabre
+ *
+ */
+
+/*
+ * 32KB Bias-Free Neural Predictor (devised by Dibakar Gope and Mikko Lipasti)
+ *
+ * Most of the code in this file has been adapted from cbp32KB/predictor.cc in
+ * https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz
+ */
+
+#include "cpu/pred/bfnp_32KB.hh"
+
+#include "debug/BFNP.hh"
+
+BFNP_32KB::BFNP_32KB(const BFNP_32KBParams *params)
+  : BPredUnit(params),
+    loopPredictor(params->loop_predictor),
+    ghrHeadPtr(0),
+    pHistHeadPtr(0),
+    filteredGhrHeadPtr(0),
+    numCondBr(0),
+    logBst(params->logBst),
+    wtReg(params->wtReg),
+    wl1(params->wl1),
+    wtRs(params->wtRs),
+    wl2(params->wl2),
+    paShiftHash(params->paShiftHash),
+    pHist(params->pHist),
+    nFoldedHist(params->nFoldedHist),
+    removeDupDist(params->removeDupDist),
+    pHistSet(params->pHistSet),
+    fHistLen(params->fHistLen),
+    extraPHistStartDist(params->extraPHistStartDist),
+    extraPHistEndDist(params->extraPHistEndDist),
+    histLen(params->histLen),
+    maxHist(params->maxHist),
+    threshold(params->trainTh),
+    thresholdCounter(0),
+    bstMask((1 << logBst)-1)
+{
+    bst_table = new bst_entry[1 << logBst];
+
+    weight_b = new int32_t[1<<(wtReg)];
+    weight_m = new int32_t*[1<<(wtReg)];
+    for (int i = 0; i < (1<<(wtReg)); ++i) {
+       weight_m[i] = new int32_t[wl1];
+    }
+    weight_rs = new int32_t[(1<<wtRs)];
+    pathAddr = new Addr[pHist];
+    folded_hist = new uint32_t[nFoldedHist];
+    ghr = new bool[nFoldedHist];
+    outPoint = new uint32_t[nFoldedHist];
+
+    isBrStable = new bool[pHist];
+
+    paDistConsd = new  uint32_t[pHistSet];
+    paFound = new uint32_t[pHistSet];
+
+    nonDupFilteredGHR1 = new bool[wl2];
+    nonDupFilteredHA1 = new Addr[wl2];
+    nonDupFilteredHADist1 = new uint32_t[wl2];
+
+    filteredHADist = new uint32_t[fHistLen];
+    filteredHA = new uint32_t[fHistLen];
+    filteredGhr = new bool[fHistLen];
+
+    for (int i = 0; i < (1<<wtReg); ++i) {
+        for (int j = 0; j < wl1; ++j)
+        {
+           weight_m[i][j] = 0;
+        }
+        weight_b[i] = 0;
+    }
+
+    for (int i = 0; i < (1<<wtRs); ++i) {
+        weight_rs[i] = 0;
+    }
+
+    for (int i = 0; i < nFoldedHist; ++i) {
+        folded_hist[i] = 0;
+        ghr[i] = false;
+    }
+    for (int i = 0; i < wl1; ++i) {
+        outPoint[i] = (i+1) % wtReg;
+    }
+    for (int i = wl1; i < nFoldedHist; ++i) {
+        outPoint[i] = (i+1) % wtRs;
+    }
+
+    for (int i = 0; i < pHist; ++i) {
+        pathAddr[i] = 0;
+        isBrStable[i] = true;
+    }
+
+    for (int i = 0; i < pHistSet; ++i) {
+        paDistConsd[i] = 0;
+        paFound[i] = 0;
+    }
+
+    for (int i = 0; i < wl2; ++i) {
+        nonDupFilteredGHR1[i] = false;
+        nonDupFilteredHA1[i] = 0;
+        nonDupFilteredHADist1[i] = 0;
+    }
+
+    for (int i = 0; i < fHistLen; ++i) {
+        filteredHADist[i] = 0;
+        filteredHA[i] = 0;
+        filteredGhr[i] = false;
+    }
+}
+
+void
+BFNP_32KB::regStats()
+{
+    BPredUnit::regStats();
+
+    perceptronCorrect
+        .name(name() + ".perceptronCorrect")
+ .desc("Number of times the perceptron predictor is the provider and "
+              "the prediction is correct");
+
+    perceptronWrong
+        .name(name() + ".perceptronWrong")
+ .desc("Number of times the perceptron predictor is the provider and "
+              "the prediction is wrong");
+}
+
+void
+BFNP_32KB::uncondBranch(ThreadID tid, Addr pc, void * &bp_history)
+{
+    // Only conditional branches are tracked
+    // But we need to create a BFNPHistory in any case
+    BFNPHistory *history = new BFNPHistory(0, true);
+    bp_history = static_cast<void*>(history);
+    DPRINTF(BFNP, "Ingnoring unconditional branch for pc 0x%lx\n", pc);
+    return;
+}
+
+bool
+BFNP_32KB::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
+{
+    Addr pc = branch_addr >> instShiftAmt;
+    BFNPHistory *history = new BFNPHistory(wl1+wl2);
+    bp_history = static_cast<void*>(history);
+    int64_t accum = 0;
+    bool pred;
+    int corrConsd = 0;
+
+    uint32_t corrFoundCurCycle = 0;
+
+    if (bst_table[pc & bstMask].state != NON_BIASED) {
+        pred = (bst_table[pc & bstMask].state == TAKEN);
+    } else {
+        int non_dup_consd = 0;
+        int distantHist = 0;
+
+        // Accumulate bias weight
+        uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+        accum = accum + weight_b[bias_widx];
+
+ // Accumulate weights from the conventional perceptron predictor weight
+        // table for 11 recent unfiltered branches
+        int histPtr = ghrHeadPtr;
+        int phistPtr = pHistHeadPtr;
+        for (int j=0; j <wl1; j++) {
+            uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtReg);
+            widx = widx ^ folded_hist[j];
+            widx = widx % (1<<wtReg);
+            if ( ghr[histPtr] == 1)
+                accum += weight_m[widx][j];
+            else
+                accum -= weight_m[widx][j];
+
+            history->idxConsd[corrFoundCurCycle] = widx;
+            history->dirConsd[corrFoundCurCycle] = ghr[histPtr];
+            corrFoundCurCycle++;
+
+            histPtr++;
+            if (histPtr == nFoldedHist) histPtr = 0;
+            phistPtr++;
+            if (phistPtr == pHist) phistPtr = 0;
+        }
+
+ // Accumulate weights from weight_rs table for Non_biased branches that
+        // are at absolute distance 12 to distance 32 in the past global
+        // history. This is done to boost accuracy
+        histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+        phistPtr = (pHistHeadPtr+wl1)%pHist;
+        for (int j=wl1; j<removeDupDist; j++) {
+            if ((pathAddr[phistPtr] != 0) && (! isBrStable[phistPtr])){
+                uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtRs);
+                uint32_t distance = (j+1) ^ ((j+1) / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+                widx = widx ^ folded_hist[j];
+                widx = widx % (1<<wtRs);
+                if (ghr[histPtr] == 1)        // If history is Taken
+                    accum += weight_rs[widx];
+                else                 // If history is Not-Taken
+                    accum -= weight_rs[widx];
+                paDistConsd[distantHist] = j+1;
+
+                history->idxConsd[corrFoundCurCycle] = widx;
+                history->dirConsd[corrFoundCurCycle] = ghr[histPtr];
+                corrFoundCurCycle++;
+
+                distantHist++;
+            }
+            histPtr++;
+            if (histPtr == nFoldedHist) histPtr = 0;
+            phistPtr++;
+            if (phistPtr == pHist) phistPtr = 0;
+
+        }
+
+        // Accumulate weights from weight_rs table for Non_biased branches
+ // (present in the Recency Stack) that are at absolute distance above
+        // 32 and below 1024 in the past global history
+        for (int j=0; ((distantHist < wl2) && (j<wl2)); j++, corrConsd++) {
+            int pathPCDist = nonDupFilteredHADist1[j];
+            if ((nonDupFilteredHA1[j] != 0) && (pathPCDist < 1024)){
+                uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+                uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+
+                if ((pathPCDist-1) < nFoldedHist) {
+                    widx = widx ^ folded_hist[pathPCDist-1];
+                    widx = widx % (1<<wtRs);
+                } else {
+                    widx = widx ^ folded_hist[nFoldedHist-1];
+                    widx = widx % (1<<wtRs);
+                }
+
+                if (nonDupFilteredGHR1[j] == 1) // If history is Taken
+                    accum += weight_rs[widx];
+                else                            // If history is Not-Taken
+                    accum -= weight_rs[widx];
+
+                paDistConsd[distantHist] = pathPCDist;
+
+                history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] = nonDupFilteredGHR1[j];
+                corrFoundCurCycle++;
+
+                distantHist++;
+            } else {
+                break;
+            }
+        }
+
+        non_dup_consd = 0;
+        int prevPathPCDist = 0;
+
+ // On few traces, a small set of branches executes repeatedly in the + // past history, as a result Recency Stack can not provide that many
+        // branches to the prediction function since it contains only the
+ // latest occurrence of a branch. The following For loop with the help
+        // of {filteredHA, filteredHADist, filteredGhr} and histLen array
+        // attempts to capture muliple instances of a branch in different
+ // distances in the past history to fill in that room and assists the
+        // prediction function
+        // Accumulate weights from weight_rs table for multple instances of
+        // Non_biased branches present in different distances (at absolute
+        // distance of at least 64) present in the filtered history
+        int filteredHistPtr = filteredGhrHeadPtr;
+        for (int j=0; ((distantHist < wl2) && (j < fHistLen)); j++) {
+            int pathPCDist = filteredHADist[filteredHistPtr];
+            if ((pathPCDist > extraPHistStartDist) &&
+                (filteredHA[filteredHistPtr] != 0) &&
+                (pathPCDist < extraPHistEndDist)) {
+
+                int prevPathPCBin = 0;
+                int curPathPCBin = 0;
+                for (int it1 = 0; it1 < (histLen.size()-1); it1++) {
+                    if ((pathPCDist >= histLen[it1]) &&
+                        (pathPCDist < histLen[it1+1])) {
+                        curPathPCBin = it1+1;
+                    }
+                    if ((prevPathPCDist >= histLen[it1]) &&
+                        (prevPathPCDist < histLen[it1+1])) {
+                        prevPathPCBin = it1+1;
+                    }
+                }
+                if (prevPathPCBin != curPathPCBin) {
+                    non_dup_consd = 0;
+                }
+                prevPathPCDist = pathPCDist;
+
+                uint32_t widx = 0;
+                uint32_t distance;
+                bool considered = false;
+                if (pathPCDist > removeDupDist) {
+                    for (int it2 = 0;
+                         ((it2 < non_dup_consd) && (it2 < pHistSet));
+                         it2++)
+                    {
+                        if (paFound[it2] == filteredHA[filteredHistPtr]) {
+                            considered = true;
+                            break;
+                        }
+                    }
+                    if (! considered) {
+                        if (non_dup_consd < pHistSet) {
+ paFound[non_dup_consd]=filteredHA[filteredHistPtr];
+                            non_dup_consd++;
+                        }
+                    }
+                }
+ // Ensures that particular history instance was not there in
+                // the Recency Stack in last for loop
+                for (int it2 = 0; it2 < distantHist; it2++) {
+                    if (paDistConsd[it2] == pathPCDist) {
+                        considered = true;
+                        break;
+                    }
+                }
+                if (! considered) {
+                    paDistConsd[distantHist] = pathPCDist;
+                    distantHist++;
+                }
+                if (! considered) {
+                    widx = gen_widx(pc, filteredHA[filteredHistPtr], wtRs);
+                    distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                    widx = widx ^ (distance);
+                    widx = widx % (1<<wtRs);
+                    if ((pathPCDist-1) < nFoldedHist) {
+                        widx = widx ^ folded_hist[pathPCDist-1];
+                        widx = widx % (1<<wtRs);
+                    }
+                    else {
+                        widx = widx ^ folded_hist[nFoldedHist-1];
+                        widx = widx % (1<<wtRs);
+                    }
+                    if (filteredGhr[filteredHistPtr] == 1) {
+                        // If history is Taken
+                        accum += weight_rs[widx];
+                    } else {
+                        // If history is Not-Taken
+                        accum -= weight_rs[widx];
+                    }
+
+                    history->idxConsd[corrFoundCurCycle] = widx;
+
+                    history->dirConsd[corrFoundCurCycle] =
+                        filteredGhr[filteredHistPtr];
+
+                    corrFoundCurCycle++;
+                }
+            }
+            else if ((filteredHA[filteredHistPtr] == 0) ||
+                     (pathPCDist >= 1024)) {
+                break;
+            }
+            filteredHistPtr++;
+            if (filteredHistPtr == fHistLen) {
+                filteredHistPtr = 0;
+            }
+        }
+
+        // Accumulate weights from weight_rs table for Non_biased branches
+        // (present in the Recency Stack) that are at absolute distance
+        // above 1024 in the past global history
+        for (int j=corrConsd;((distantHist < wl2) && (j<wl2)); j++) {
+            int pathPCDist = nonDupFilteredHADist1[j];
+            if ((nonDupFilteredHA1[j] != 0) &&
+                (pathPCDist >= 1024) &&
+                (pathPCDist <= maxHist)){
+                uint32_t widx = 0;
+                uint32_t distance;
+                widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+                distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+                if ((pathPCDist-1) < nFoldedHist) {
+                    widx = widx ^ folded_hist[pathPCDist-1];
+                    widx = widx % (1<<wtRs);
+                }
+                else {
+                    widx = widx ^ folded_hist[nFoldedHist-1];
+                    widx = widx % (1<<wtRs);
+                }
+                if (nonDupFilteredGHR1[j] == 1)  // If history is Taken
+                    accum += weight_rs[widx];
+                else                             // If history is Not-Taken
+                    accum -= weight_rs[widx];
+                paDistConsd[distantHist] = pathPCDist;
+
+                history->idxConsd[corrFoundCurCycle] = widx;
+ history->dirConsd[corrFoundCurCycle] = nonDupFilteredGHR1[j];
+                corrFoundCurCycle++;
+
+                distantHist++;
+            }
+ else if ((nonDupFilteredHA1[j] == 0) || (pathPCDist > maxHist)){
+                break;
+            }
+        }
+
+        pred = (accum >= 0);
+
+    }
+
+    history->corrFoundCurCycle = corrFoundCurCycle;
+
+    // true means trainning needed
+    history->hTrain = (accum>-threshold && accum<threshold);
+
+    history->prcptPred = pred;
+
+    // This is only used by the loop predictor at update time
+    history->loopPredictor.predTaken = pred;
+
+    // note that we are shifting the PC by instShiftAmt instead of passing
+    // instShiftAmt as the shift argument
+    // The results are slightly different, and this is needed to match the
+    // original implementation
+ pred = loopPredictor->loopPredict(tid, branch_addr >> instShiftAmt, true,
+                                      &(history->loopPredictor), pred, 0);
+
+    history->predDir = pred;
+
+    DPRINTF(BFNP, "Prediction results for pc 0x%lx => pred %d, "
+        "loopPredUsed: %d, prcptPred: %d, hTrain: %d, accum: %d, "
+        "corrFoundCurCycle: %d\n",
+        branch_addr, pred, history->loopPredictor.loopPredUsed,
+        history->prcptPred, history->hTrain, accum, corrFoundCurCycle);
+
+    return pred;
+}
+
+void
+BFNP_32KB::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history)
+{
+    // No speculation supported as of now, so do nothing
+    DPRINTF(BFNP, "Ingnoring btbUpdate for pc 0x%lx\n", branch_addr);
+}
+
+void
+BFNP_32KB::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, + bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+{
+    assert(bp_history);
+
+    BFNPHistory *history = static_cast<BFNPHistory*>(bp_history);
+
+    if (squashed) {
+        // No speculation supported, so do nothing
+        return;
+    }
+
+    if (history->isUncondBranch) {
+        delete history;
+        return;
+    }
+
+    // update stats
+    if (history->loopPredictor.loopPredUsed) {
+        loopPredictor->updateStats(taken, &(history->loopPredictor));
+    } else if (history->prcptPred == taken) {
+        perceptronCorrect++;
+    } else {
+        perceptronWrong++;
+    }
+
+    loopPredictor->condBranchUpdate(tid, branch_addr >> instShiftAmt,
+        taken, history->prcptPred, &(history->loopPredictor), 0,
+        getRandom(), getRandom(), getRandom());
+
+    Addr pc = branch_addr >> instShiftAmt;
+
+    bool t = taken;
+
+    int corrConsd = 0;
+
+    if (bst_table[pc & bstMask].state == NOT_FOUND) {
+        bst_table[pc & bstMask].state = taken ? TAKEN : NOT_TAKEN;
+    }
+    else if (((bst_table[pc & bstMask].state == TAKEN) ||
+              (bst_table[pc & bstMask].state == NOT_TAKEN)) &&
+             (t != ((bst_table[pc & bstMask].state == TAKEN)))) {
+        // Detect the current branch as Non-biased
+        bst_table[pc & bstMask].state = NON_BIASED;
+
+ // In the update phase, the predictor table indexes has been computed
+        // in the same way as done in the prediction proceude before
+        uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+        if (t == 1) {
+            if (weight_b[bias_widx] < 31) weight_b[bias_widx]++;
+        } else {
+            if (weight_b[bias_widx] > -32) weight_b[bias_widx]--;
+        }
+
+        int non_dup_consd = 0;
+        int distantHist = 0;
+
+        int histPtr = ghrHeadPtr;
+        int phistPtr = pHistHeadPtr;
+        for (int j=0; j < wl1; j++) {
+            uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtReg);
+            widx = widx ^ folded_hist[j];
+            widx = widx % (1<<wtReg);
+            if (t == ghr[histPtr]) {
+                if (weight_m[widx][j]<31) weight_m[widx][j]++;
+            } else if (t != ghr[histPtr]) {
+                if (weight_m[widx][j]>-32) weight_m[widx][j]--;
+            }
+
+            histPtr++;
+            if (histPtr == nFoldedHist) histPtr = 0;
+            phistPtr++;
+            if (phistPtr == pHist) phistPtr = 0;
+
+        }
+
+        histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+        phistPtr = (pHistHeadPtr+wl1)%pHist;
+        for (int j=wl1;j<removeDupDist; j++) {
+            if ((pathAddr[phistPtr] != 0) && (! isBrStable[phistPtr])){
+                uint32_t widx = gen_widx(pc, pathAddr[phistPtr], wtRs);
+                uint32_t distance = (j+1) ^ ((j+1) / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+                widx = widx ^ folded_hist[j];
+                widx = widx % (1<<wtRs);
+                if (t == ghr[histPtr]) {
+                    if (weight_rs[widx]<15) weight_rs[widx]++;
+                } else if (t != ghr[histPtr]) {
+                    if (weight_rs[widx]>-16) weight_rs[widx]--;
+                }
+                paDistConsd[distantHist] = j+1;
+                distantHist++;
+            }
+            histPtr++;
+            if (histPtr == nFoldedHist) histPtr = 0;
+            phistPtr++;
+            if (phistPtr == pHist) phistPtr = 0;
+
+        }
+
+        for (int j=0;((distantHist < wl2) && (j<wl2)); j++, corrConsd++) {
+            int pathPCDist = nonDupFilteredHADist1[j];
+            if ((nonDupFilteredHA1[j] != 0) && (pathPCDist < 1024)) {
+                uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+                uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+                if ((pathPCDist-1) < nFoldedHist) {
+                    widx = widx ^ folded_hist[pathPCDist-1];
+                    widx = widx % (1<<wtRs);
+                } else {
+                    widx = widx ^ folded_hist[nFoldedHist-1];
+                    widx = widx % (1<<wtRs);
+                }
+                if (t == nonDupFilteredGHR1[j]) {
+                    if (weight_rs[widx]<15) weight_rs[widx]++;
+                } else if (t != nonDupFilteredGHR1[j]) {
+                    if (weight_rs[widx]>-16) weight_rs[widx]--;
+                }
+                paDistConsd[distantHist] = pathPCDist;
+                distantHist++;
+            } else {
+                break;
+            }
+        }
+
+        non_dup_consd = 0;
+        int prevPathPCDist = 0;
+        int filteredHistPtr = filteredGhrHeadPtr;
+
+        for (int j=0; ((distantHist < wl2) && (j < fHistLen)); j++) {
+            int pathPCDist = filteredHADist[filteredHistPtr];
+            if ((pathPCDist > extraPHistStartDist) &&
+                (filteredHA[filteredHistPtr] != 0) &&
+                (pathPCDist < extraPHistEndDist)){
+
+                int prevPathPCBin = 0;
+                int curPathPCBin = 0;
+                for (int it1 = 0; it1 < (histLen.size()-1); it1++) {
+                    if ((pathPCDist >= histLen[it1]) &&
+                        (pathPCDist < histLen[it1+1])) {
+                        curPathPCBin = it1+1;
+                    }
+                    if ((prevPathPCDist >= histLen[it1]) &&
+                        (prevPathPCDist < histLen[it1+1])) {
+                        prevPathPCBin = it1+1;
+                    }
+                }
+                if (prevPathPCBin != curPathPCBin) {
+                    non_dup_consd = 0;
+                }
+                prevPathPCDist = pathPCDist;
+
+                uint32_t widx = 0;
+                uint32_t distance;
+                bool considered = false;
+                if (pathPCDist > removeDupDist) {
+                    for (int it2 = 0;
+                         ((it2 < non_dup_consd) && (it2 < pHistSet));
+                         it2++)
+                    {
+                        if (paFound[it2] == filteredHA[filteredHistPtr]) {
+                            considered = true;
+                            break;
+                        }
+                    }
+                    if (! considered) {
+                        if (non_dup_consd < pHistSet) {
+ paFound[non_dup_consd]=filteredHA[filteredHistPtr];
+                            non_dup_consd++;
+                        }
+                    }
+                }
+
+                for (int it2 = 0; it2 <distantHist; it2++) {
+                    if (paDistConsd[it2] == pathPCDist) {
+                        considered = true;
+                        break;
+                    }
+                }
+                if (! considered) {
+                    paDistConsd[distantHist] = pathPCDist;
+                    distantHist++;
+                }
+                if (! considered) {
+                    widx = gen_widx(pc, filteredHA[filteredHistPtr], wtRs);
+                    distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                    widx = widx ^ (distance);
+                    widx = widx % (1<<wtRs);
+                    if ((pathPCDist-1) < nFoldedHist) {
+                        widx = widx ^ folded_hist[pathPCDist-1];
+                        widx = widx % (1<<wtRs);
+                    } else {
+                        widx = widx ^ folded_hist[nFoldedHist-1];
+                        widx = widx % (1<<wtRs);
+                    }
+                    if (t == filteredGhr[filteredHistPtr]) {
+                        if (weight_rs[widx]<15) weight_rs[widx]++;
+                    } else if (t != filteredGhr[filteredHistPtr]) {
+                        if (weight_rs[widx]>-16) weight_rs[widx]--;
+                    }
+                }
+            } else if ((filteredHA[filteredHistPtr] == 0) ||
+                       (pathPCDist >= 1024)) {
+                break;
+            }
+            filteredHistPtr++;
+            if (filteredHistPtr == fHistLen) filteredHistPtr = 0;
+
+        }
+
+        for (int j=corrConsd;((distantHist < wl2) && (j<wl2)); j++) {
+            int pathPCDist = nonDupFilteredHADist1[j];
+            if ((nonDupFilteredHA1[j] != 0) && (pathPCDist >= 1024) &&
+                (pathPCDist <= maxHist)) {
+                uint32_t widx = gen_widx(pc, nonDupFilteredHA1[j], wtRs);
+                uint32_t distance = pathPCDist ^ (pathPCDist / (1<<wtRs));
+                widx = widx ^ (distance);
+                widx = widx % (1<<wtRs);
+                if ((pathPCDist-1) < nFoldedHist) {
+                    widx = widx ^ folded_hist[pathPCDist-1];
+                    widx = widx % (1<<wtRs);
+                } else {
+                    widx = widx ^ folded_hist[nFoldedHist-1];
+                    widx = widx % (1<<wtRs);
+                }
+                if (t == nonDupFilteredGHR1[j]) {
+                    if (weight_rs[widx]<15) weight_rs[widx]++;
+                } else if (t != nonDupFilteredGHR1[j]) {
+                    if (weight_rs[widx]>-16) weight_rs[widx]--;
+                }
+                paDistConsd[distantHist] = pathPCDist;
+                distantHist++;
+ } else if ((nonDupFilteredHA1[j] == 0) || (pathPCDist > maxHist)) {
+                break;
+            }
+        }
+    } else if (((bst_table[pc & bstMask].state == TAKEN) ||
+                (bst_table[pc & bstMask].state == NOT_TAKEN)) &&
+               (t == ((bst_table[pc & bstMask].state == TAKEN)))) {
+        // nothing
+    } else if ( (t != history->predDir) || history->hTrain) {
+        //Training needed if threshold not exceeded or predict wrong
+        uint32_t bias_widx = gen_bias_widx(pc, wtReg);
+        if (t == 1) {
+            if (weight_b[bias_widx] < 31)  weight_b[bias_widx]++;
+        } else {
+            if (weight_b[bias_widx] > -32) weight_b[bias_widx]--;
+        }
+
+        for (int j = 0; j < wl1; j++) {
+            if (t == history->dirConsd[j]) {
+                if (weight_m[history->idxConsd[j]][j]<31) {
+                    weight_m[history->idxConsd[j]][j]++;
+                }
+            } else if (t != history->dirConsd[j]) {
+                if (weight_m[history->idxConsd[j]][j]>-32) {
+                    weight_m[history->idxConsd[j]][j]--;
+                }
+            }
+        }
+        for (int j = wl1; j < history->corrFoundCurCycle; j++) {
+            if (t == history->dirConsd[j]) {
+                if (weight_rs[history->idxConsd[j]]<15) {
+                    weight_rs[history->idxConsd[j]]++;
+                }
+            } else if (t != history->dirConsd[j]) {
+                if (weight_rs[history->idxConsd[j]]>-16) {
+                    weight_rs[history->idxConsd[j]]--;
+                }
+            }
+        }
+    }
+
+    // threshold adjusting
+    if (bst_table[pc & bstMask].state == NON_BIASED) {
+        if (t != history->predDir) {
+            thresholdCounter++;
+            if (thresholdCounter==63) {
+                thresholdCounter = 0;
+                threshold++;
+            }
+        } else if ((t==history->predDir) && history->hTrain) {
+            thresholdCounter--;
+            if (thresholdCounter==-63) {
+                thresholdCounter = 0;
+                threshold--;
+            }
+        }
+    }
+
+ // Compute folded history in each bit position upto nFoldedHist bits based
+    // upon the folded history already computed before in the last cycle
+    int histPtr = ghrHeadPtr;
+    for (int hiter = 0; hiter < wl1; hiter++) {
+        folded_hist[hiter] = (folded_hist[hiter] << 1) | (taken?1:0);
+        folded_hist[hiter] ^= ((ghr[histPtr]?1:0) << outPoint[hiter]);
+        folded_hist[hiter] ^= (folded_hist[hiter] >> wtReg);
+        folded_hist[hiter] &= (1 << wtReg) - 1;
+
+        histPtr++;
+        if (histPtr == nFoldedHist) histPtr = 0;
+
+    }
+    histPtr = (ghrHeadPtr+wl1)%nFoldedHist;
+    for (int hiter = wl1; hiter < nFoldedHist; hiter++) {
+        folded_hist[hiter] = (folded_hist[hiter] << 1) | (taken?1:0);
+        folded_hist[hiter] ^= ((ghr[histPtr]?1:0) << outPoint[hiter]);
+        folded_hist[hiter] ^= (folded_hist[hiter] >> wtRs);
+        folded_hist[hiter] &= (1 << wtRs) - 1;
+
+        histPtr++;
+        if (histPtr == nFoldedHist) histPtr = 0;
+
+    }
+
+    // If the current branch is "completely biased or stable" branch?
+    bool isStableBranch = (bst_table[pc & bstMask].state != NON_BIASED);
+
+    // Update unfiltered GHR and path history
+    ghrHeadPtr--;
+    if (ghrHeadPtr == -1) ghrHeadPtr = nFoldedHist-1;
+    ghr[ghrHeadPtr] = taken;
+    pHistHeadPtr--;
+    if (pHistHeadPtr == -1) pHistHeadPtr = pHist-1;
+    pathAddr[pHistHeadPtr] = pc;
+    isBrStable[pHistHeadPtr] = isStableBranch;
+
+    // Update filtered history used to boost accuracy and provide multiple
+    // instances of a branch to the prediction function if required
+    for (int j=0; j< fHistLen; j++) {
+        if (filteredHA[j] != 0) filteredHADist[j]++;
+    }
+
+    if (numCondBr >= extraPHistStartDist) {
+        if (! isBrStable[(pHistHeadPtr+extraPHistStartDist)%pHist]) {
+            filteredGhrHeadPtr--;
+            if (filteredGhrHeadPtr == -1) filteredGhrHeadPtr = fHistLen-1;
+
+            filteredGhr[filteredGhrHeadPtr] =
+                ghr[(ghrHeadPtr+extraPHistStartDist)%nFoldedHist];
+
+            filteredHA[filteredGhrHeadPtr] =
+                pathAddr[(pHistHeadPtr+extraPHistStartDist)%pHist];
+
+            filteredHADist[filteredGhrHeadPtr] = extraPHistStartDist+1;
+        }
+    }
+
+ // Update the absolute distance of the latest occurrence of the branches
+    // present in Recency Stack (RS)
+    for (int j=0; j<wl2; j++) {
+        if (nonDupFilteredHA1[j] != 0) nonDupFilteredHADist1[j]++;
+    }
+    // If the current branch is Non-baised, insert that into the
+    // Recency Stack (RS)
+    if (numCondBr >= removeDupDist) {
+        if (! isBrStable[(pHistHeadPtr+removeDupDist)%pHist]) {
+            int j = 0;
+            for (j=0; j<wl2; j++) {
+                const uint32_t idx = (pHistHeadPtr+removeDupDist)%pHist;
+                if (pathAddr[idx] == nonDupFilteredHA1[j]) {
+                    //Find if a prior occurrence is already present in RS
+                    j++;
+                    break;
+                }
+            }
+            for (int k = j-1; k>0; k--) {
+                nonDupFilteredGHR1[k] = nonDupFilteredGHR1[k-1];
+                nonDupFilteredHA1[k]=nonDupFilteredHA1[k-1];
+                nonDupFilteredHADist1[k]=nonDupFilteredHADist1[k-1];
+            }
+ nonDupFilteredGHR1[0]=ghr[(ghrHeadPtr+removeDupDist)%nFoldedHist]; + nonDupFilteredHA1[0]=pathAddr[(pHistHeadPtr+removeDupDist)%pHist];
+            nonDupFilteredHADist1[0]=removeDupDist + 1;
+        }
+    }
+
+    numCondBr++;
+
+    delete history;
+}
+
+void
+BFNP_32KB::squash(ThreadID tid, void *bp_history)
+{
+    BFNPHistory * history = static_cast<BFNPHistory*>(bp_history);
+    // No speculation supported, so nothing needs to be restored
+    delete history;
+}
+
+/*----- Hash function that calculating index of bias table -----*/
+uint32_t
+BFNP_32KB::gen_bias_widx(Addr cur_pc, uint32_t wt_size)
+{
+   cur_pc = (cur_pc ) ^ (cur_pc / (1<<wt_size));
+   uint32_t widx = cur_pc;
+   widx = widx % (1<<wt_size);
+   return widx;
+}
+
+// up-down saturating counter
+void
+BFNP_32KB::ctrupdate(int32_t & ctr, bool taken, int nbits)
+{
+    if (taken) {
+        if (ctr < ((1 << (nbits - 1)) - 1))
+            ctr++;
+    } else {
+        if (ctr > -(1 << (nbits - 1)))
+            ctr--;
+    }
+}
+
+// Hash function that calculating index of weight tables
+uint32_t
+BFNP_32KB::gen_widx(Addr cur_pc, Addr path_pc, uint32_t wt_size) const
+{
+    cur_pc = (cur_pc ) ^ (cur_pc / (1<<wt_size));
+    path_pc = path_pc >> paShiftHash;
+    path_pc = (path_pc) ^ (path_pc / (1<<wt_size));
+    uint32_t widx = cur_pc ^ (path_pc);
+    widx = widx % (1<<wt_size);
+    return widx;
+}
+
+BFNP_32KB*
+BFNP_32KBParams::create()
+{
+    return new BFNP_32KB(this);
+}
+
+bool
+BFNP_32KB_LoopPredictor::optionalAgeInc(int nrand) const
+{
+    return (nrand & 7) == 0;
+}
+
+int
+BFNP_32KB_LoopPredictor::lindex(Addr pc_in, unsigned instShiftAmt) const
+{
+    return (((pc_in >> instShiftAmt) & loopSetMask) << logLoopTableAssoc);
+}
+
+BFNP_32KB_LoopPredictor*
+BFNP_32KB_LoopPredictorParams::create()
+{
+    return new BFNP_32KB_LoopPredictor(this);
+}
+
diff --git a/src/cpu/pred/bfnp_32KB.hh b/src/cpu/pred/bfnp_32KB.hh
new file mode 100644
index 0000000..1af68d0
--- /dev/null
+++ b/src/cpu/pred/bfnp_32KB.hh
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Pau Cabre
+ *
+ */
+
+/*
+ * 32KB Bias-Free Neural Predictor (devised by Dibakar Gope and Mikko Lipasti)
+ *
+ * Most of the code in this file has been adapted from cbp32KB/predictor.h in
+ * https://www.jilp.org/cbp2014/code/DibakarGope.tar.gz
+ */
+
+#ifndef __CPU_PRED_BFNP_32KB_HH
+#define __CPU_PRED_BFNP_32KB_HH
+
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/pred/bpred_unit.hh"
+#include "cpu/pred/loop_predictor.hh"
+#include "params/BFNP_32KB.hh"
+#include "params/BFNP_32KB_LoopPredictor.hh"
+
+class BFNP_32KB_LoopPredictor : public LoopPredictor {
+  public:
+    BFNP_32KB_LoopPredictor(BFNP_32KB_LoopPredictorParams *p)
+      : LoopPredictor(p)
+    {}
+
+    bool optionalAgeInc(int nrand) const override;
+    int lindex(Addr pc_in, unsigned instShiftAmt) const override;
+};
+
+class BFNP_32KB : public BPredUnit
+{
+  public:
+    /**
+     * Default branch predictor constructor.
+     */
+    BFNP_32KB(const BFNP_32KBParams *params);
+
+    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) override;
+
+    /**
+     * Looks up the given address in the branch predictor and returns
+     * a true/false value as to whether it is taken.
+     * @param branch_addr The address of the branch to look up.
+     * @param bp_history Pointer to any bp history state.
+     * @return Whether or not the branch is taken.
+     */
+ bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history) override;
+
+    /**
+     * Updates the branch predictor to Not Taken if a BTB entry is
+     * invalid or not found.
+     * @param branch_addr The address of the branch to look up.
+     * @param bp_history Pointer to any bp history state.
+     * @return Whether or not the branch is taken.
+     */
+    void btbUpdate(ThreadID tid, Addr branch_addr,
+                   void * &bp_history) override;
+
+    /**
+     * Updates the branch predictor with the actual result of a branch.
+     * @param branch_addr The address of the branch to update.
+     * @param taken Whether or not the branch was taken.
+     */
+    void update(ThreadID tid, Addr branch_addr, bool taken,
+ void *bp_history, bool squashed, const StaticInstPtr & inst,
+                Addr corrTarget) override;
+
+    void squash(ThreadID tid, void *bp_history) override;
+
+    void regStats() override;
+
+  private:
+
+    /** The loop predictor object */
+    LoopPredictor *loopPredictor;
+
+    struct BFNPHistory
+    {
+        LoopPredictor::BranchInfo loopPredictor;
+
+        std::vector<uint32_t> idxConsd;
+        std::vector<bool> dirConsd;
+
+        uint32_t corrFoundCurCycle;
+
+        bool hTrain;
+        bool prcptPred;
+        bool predDir;
+        bool isUncondBranch;
+
+        BFNPHistory(uint32_t size, bool isUncondBranch = false)
+          : loopPredictor(),
+            isUncondBranch(isUncondBranch)
+        {
+            corrFoundCurCycle = 0;
+            idxConsd.resize(size);
+            dirConsd.resize(size);
+        }
+    };
+
+    enum BST_STATE {
+        NOT_FOUND,
+        TAKEN,
+        NOT_TAKEN,
+        NON_BIASED
+    };
+
+    struct bst_entry
+    {
+        BST_STATE state;
+        bst_entry()
+          : state(NOT_FOUND)
+        {
+        }
+    };
+
+    bst_entry * bst_table; // branch status table
+
+    /* --- weight tables --- */
+    int32_t *weight_b;  //1-dimensional bias weight table
+    int32_t **weight_m; //2-dimensional conventional perceptron predictor
+                        // weight table
+ int32_t *weight_rs; //1-dimensional weight table as proposed in the writeup
+
+    Addr *pathAddr; // Path address register containing the address of past
+                    // branches without any filtering
+                    // It was 32-bits in the original implementation, and
+                    // it has been extended to 64-bits here
+
+    uint32_t *folded_hist; // folded history array; n-th bit in this array
+ // computes folded history for the history bits from + // the n-th positon in the global unfiltered history
+                           // to the current branch; Please Note that, the
+                           // space occupied for this small folded_history
+ // array is NOT included in the storage budget, as + // this can be easily computed from the GHR register + // during the prediction computation for each and + // every bit position; Storing the folded history in
+                           // the folded_hist array simply speeds up the
+                           // simulation.
+
+    bool     *ghr;         // Unfiltered history shift register used for
+                           // prediction and updates
+
+ bool *isBrStable; // A shift register/circular buffer indicating if a + // branch in PA was stable the time it was inserted
+                           // into that
+
+    uint32_t *paDistConsd; // Set containing the absolute distances of
+                           // branches, used later in the code to remove
+ // duplicate instances of a branch in the prediction
+                           // computation
+
+ uint32_t *paFound; // Set containing some path addresses used later in
+                           // the code to remove duplicate instances of a
+                           // branch in the prediction computation
+
+    // Folloing three variables are the three fields in Recency Stack (RS)
+    // defined in the paper
+ bool *nonDupFilteredGHR1; // used for RS[].H to contain the latest
+                                     // outcome of a branch
+    Addr *nonDupFilteredHA1;         // used for RS[].A to contain branch
+ // address/tag (hashed down to required
+                                     // number of bits)
+    uint32_t *nonDupFilteredHADist1; // used for RS[].P to contain absolute
+ // distance of the latest occurrence of a
+                                     // branch in global history
+
+ bool *filteredGhr; // outcome of the branch in filtered history + uint32_t *filteredHADist; // absolute distance/depth of the branch in the
+                                // global history
+ uint32_t *filteredHA; // address of the branch in filtered history
+
+    uint32_t *outPoint;
+
+    int32_t ghrHeadPtr;         // pointer to the start of the circular
+                                // buffer GHR
+ int32_t pHistHeadPtr; // pointer to the start of the circular buffer
+                                // PA and isBrStable
+ int32_t filteredGhrHeadPtr; // pointer to the start of the circular buffer
+                                // filteredGHR, filteredGHRHA and
+                                // filteredGHRDist
+
+    uint32_t numCondBr; //counter to count number of retired cond. branches
+
+    const uint32_t logBst;
+    const uint32_t wtReg;
+    const uint32_t wl1;
+    const uint32_t wtRs;
+    const uint32_t wl2;
+    const uint32_t paShiftHash;
+    const uint32_t pHist;
+    const uint32_t nFoldedHist;
+    const uint32_t removeDupDist;
+
+    // size of a set used to remove duplicate instances in filtered
+    // histories {filteredGhr, filteredHA, filteredHADist}
+    const uint32_t pHistSet;
+
+    // tracks filtered histories that appeared at least at depth of
+ // extraPHistStartDist in the global history;used for boosting the accuracy
+    const uint32_t fHistLen;
+
+    const uint32_t extraPHistStartDist;
+    const uint32_t extraPHistEndDist;
+
+    // histLen array is used in conjuntion with filtered history as defined
+    // below in the code and is used to boost accuracy and provide multiple
+    // instances of a branch present in the past global history to the
+    // prediction function if required
+    const std::vector<uint32_t> histLen;
+
+    const uint32_t maxHist;
+
+    int32_t threshold; // dynamic threshold value as in O-GEHL
+    int32_t thresholdCounter; // threshold counter as in O-GEHL
+
+    const uint32_t bstMask;
+
+    // Hash function that calculating index of bias table
+    static uint32_t gen_bias_widx(Addr cur_pc, uint32_t wt_size);
+
+    // up-down saturating counter
+    static void ctrupdate(int32_t & ctr, bool taken, int nbits);
+
+    // Hash function that calculating index of weight tables
+    uint32_t gen_widx(Addr cur_pc, Addr path_pc, uint32_t wt_size) const;
+
+    // stats
+    Stats::Scalar perceptronCorrect;
+    Stats::Scalar perceptronWrong;
+};
+
+#endif // __CPU_PRED_BFNP_32KB_HH
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index cdfa9c8..0baa597 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -49,6 +49,7 @@
 #include "arch/isa_traits.hh"
 #include "arch/types.hh"
 #include "arch/utility.hh"
+#include "base/random.hh"
 #include "base/trace.hh"
 #include "config/the_isa.hh"
 #include "debug/Branch.hh"
@@ -492,6 +493,12 @@
     }
 }

+int
+BPredUnit::getRandom() const
+{
+    return random_mt.random<int>();
+}
+
 void
 BPredUnit::dump()
 {
diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh
index 7f68cf5..d0b7512 100644
--- a/src/cpu/pred/bpred_unit.hh
+++ b/src/cpu/pred/bpred_unit.hh
@@ -322,6 +322,13 @@
     const unsigned instShiftAmt;

     /**
+     * Algorithm for returning a random number
+     * This base class just uses random_mt, but some derived classes
+     * may want to use a more realistic implementation or force some values
+     */
+    int getRandom() const;
+
+    /**
      * @{
      * @name PMU Probe points.
      */
diff --git a/src/cpu/pred/loop_predictor.hh b/src/cpu/pred/loop_predictor.hh
index d55591b..59717a8 100644
--- a/src/cpu/pred/loop_predictor.hh
+++ b/src/cpu/pred/loop_predictor.hh
@@ -147,7 +147,7 @@
      * @param pc_in The unshifted branch PC.
      * @param instShiftAmt Shift the pc by as many bits
      */
-    int lindex(Addr pc_in, unsigned instShiftAmt) const;
+    virtual int lindex(Addr pc_in, unsigned instShiftAmt) const;

     /**
      * Computes the index used to access the
diff --git a/src/cpu/pred/tage_base.cc b/src/cpu/pred/tage_base.cc
index 7fba790..499ff8b 100644
--- a/src/cpu/pred/tage_base.cc
+++ b/src/cpu/pred/tage_base.cc
@@ -42,7 +42,6 @@

 #include "base/intmath.hh"
 #include "base/logging.hh"
-#include "base/random.hh"
 #include "base/trace.hh"
 #include "debug/Fetch.hh"
 #include "debug/Tage.hh"
@@ -630,12 +629,6 @@
     return;
 }

-int
-TAGEBase::getRandom() const
-{
-    return random_mt.random<int>();
-}
-
 void
 TAGEBase::updateStats(bool taken, TageBranchInfo* bi)
 {
diff --git a/src/cpu/pred/tage_base.hh b/src/cpu/pred/tage_base.hh
index ebd5053..9cb803e 100644
--- a/src/cpu/pred/tage_base.hh
+++ b/src/cpu/pred/tage_base.hh
@@ -425,13 +425,6 @@
      */
     virtual void extraAltCalc(TageBranchInfo* bi);

-    /**
-     * Algorithm for returning a random number
-     * This base TAGE class just uses random_mt, but some derived classes
-     * may want to use a more realistic implementation or force some values
-     */
-    int getRandom() const;
-
     const unsigned logRatioBiModalHystEntries;
     const unsigned nHistoryTables;
     const unsigned tagTableCounterBits;

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/15956
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: Ia159c5e7430a387aa46f33b0ed1503bc515d9c73
Gerrit-Change-Number: 15956
Gerrit-PatchSet: 1
Gerrit-Owner: Pau Cabre <pau.ca...@metempsy.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to