Ivan Pizarro has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/14820
Change subject: mem-cache: Implementation of the Best Offset Prefetcher
(BOP) described in the paper 'A Best-Offset Prefetcher' by Pierre Michaud
......................................................................
mem-cache: Implementation of the Best Offset Prefetcher (BOP) described in
the paper 'A Best-Offset Prefetcher' by Pierre Michaud
Change-Id: I61bb89ca5639356d54aeb04e856d5bf6e8805c22
---
M src/mem/cache/prefetch/Prefetcher.py
M src/mem/cache/prefetch/SConscript
A src/mem/cache/prefetch/bop.cc
A src/mem/cache/prefetch/bop.hh
4 files changed, 429 insertions(+), 0 deletions(-)
diff --git a/src/mem/cache/prefetch/Prefetcher.py
b/src/mem/cache/prefetch/Prefetcher.py
index bae235d..931e57f 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -137,3 +137,27 @@
cxx_header = "mem/cache/prefetch/tagged.hh"
degree = Param.Int(2, "Number of prefetches to generate")
+
+class BOPPrefetcher(QueuedPrefetcher):
+ type = "BOPPrefetcher"
+ cxx_class = "BOPPrefetcher"
+ cxx_header = "mem/cache/prefetch/bop.hh"
+ score_max = Param.Unsigned(31, "")
+ round_max = Param.Unsigned(100, "")
+ bad_score = Param.Unsigned(1, "")
+ rr_size = Param.Unsigned(64, "Number of entries of each RR bank")
+ tag_bits = Param.Unsigned(12, "Bits used to store the tag")
+ offset_list_size = Param.Unsigned(46,
+ "Number of entries in the offsets list")
+ negative_offsets_enable = Param.Bool(True,
+ "Initialize the offsets list also with negative values \
+ (i.e. the table will have half of the entries with
positive \
+ offsets and the other half with negative ones")
+ delay_queue_enable = Param.Bool(True, "Enable the delay queue")
+ delay_queue_size = Param.Unsigned(15,
+ "Number of entries in the delay queue")
+ delay_queue_cycles = Param.Cycles(60,
+ "Cycles to delay a write in the left RR table from the
delay \
+ queue")
+ replacement_policy = Param.BaseReplacementPolicy(LRURP(), "Repl.
policy")
+ block_size = Param.Int(Parent.cache_line_size, "block size in bytes")
diff --git a/src/mem/cache/prefetch/SConscript
b/src/mem/cache/prefetch/SConscript
index 2665d18..6182510 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -36,4 +36,5 @@
Source('queued.cc')
Source('stride.cc')
Source('tagged.cc')
+Source('bop.cc')
diff --git a/src/mem/cache/prefetch/bop.cc b/src/mem/cache/prefetch/bop.cc
new file mode 100644
index 0000000..2e1b00c
--- /dev/null
+++ b/src/mem/cache/prefetch/bop.cc
@@ -0,0 +1,256 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+#include "mem/cache/prefetch/bop.hh"
+
+#include "debug/HWPrefetch.hh"
+#include "params/BOPPrefetcher.hh"
+
+BOPPrefetcher::BOPPrefetcher(const BOPPrefetcherParams *p)
+ : QueuedPrefetcher(p),
+ score_max(p->score_max), round_max(p->round_max),
+ bad_score(p->bad_score), rr_entries(p->rr_size),
+ tag_mask((1 << p->tag_bits) - 1),
offset_list_size(p->offset_list_size),
+ use_negative_offsets(p->negative_offsets_enable),
+ delay_queue_enabled(p->delay_queue_enable),
+ delay_queue_size(p->delay_queue_size),
+ delay_ticks(cyclesToTicks(p->delay_queue_cycles)),
+ block_size(p->block_size),
+ delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
+ enabled(false), best_offset(1), offset_index(0), best_score(0),
round(0)
+{
+ assert(isPowerOf2(rr_entries));
+
+ rr_left.resize(rr_entries);
+ rr_right.resize(rr_entries);
+
+ phase_best_offset = 0;
+}
+
+void
+BOPPrefetcher::delayQueueEventWrapper()
+{
+ assert(delay_queue.size());
+ Addr x = delay_queue.front().baseaddr;
+ insert_into_rr(x, LEFT);
+ delay_queue.pop_front();
+
+ // Schedule an event for the next element if there is one
+ if (delay_queue.size()) {
+ Tick insert_tick = delay_queue.front().insert_tick;
+ Tick ticks_to_finish = insert_tick + delay_ticks - curTick();
+ schedule(delayQueueEvent, curTick() + ticks_to_finish);
+ }
+}
+
+unsigned int
+BOPPrefetcher::hash(Addr addr, unsigned int way)
+{
+ Addr hash1 = addr >> way;
+ Addr hash2 = hash1 >> floorLog2(rr_entries);
+ return (hash1 ^ hash2) & (Addr)(rr_entries - 1);
+}
+
+void
+BOPPrefetcher::insert_into_rr(Addr addr, unsigned int way)
+{
+ switch (way) {
+ case LEFT:
+ rr_left[hash(addr, LEFT)] = addr;
+ break;
+ case RIGHT:
+ rr_right[hash(addr, RIGHT)] = addr;
+ break;
+ }
+}
+
+void
+BOPPrefetcher::insert_into_delay_queue(Addr x)
+{
+ if (delay_queue.size() == delay_queue_size) {
+ return;
+ }
+
+ // Add the address to the delay queue and schedule an event to process
+ // it after the specified delay cycles
+ delay_queue.push_back(DelayQueueEntry(x));
+
+ if (!delayQueueEvent.scheduled()) {
+ schedule(delayQueueEvent, curTick() + delay_ticks);
+ }
+}
+
+void
+BOPPrefetcher::reset_scores()
+{
+ for (unsigned int i = 0; i < offsets_list.size(); i++) {
+ offsets_list[i].second = 0;
+ }
+}
+
+inline unsigned int
+BOPPrefetcher::tag(Addr addr)
+{
+ return (addr >> block_size) & tag_mask;
+}
+
+bool
+BOPPrefetcher::test_offset(Addr x)
+{
+ Addr offset = offsets_list[offset_index].first;
+ Addr lookup = x - offset;
+
+ for (unsigned int i = 0; i < rr_left.size(); i++) {
+ if (rr_left[i] == lookup) {
+ return true;
+ }
+ }
+
+ for (unsigned int i = 0; i < rr_right.size(); i++) {
+ if (rr_right[i] == lookup) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void
+BOPPrefetcher::best_offset_learning(Addr x)
+{
+ // There was a hit in the RR table, increment the score for this offset
+ if (test_offset(x)) {
+ DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
+ offsets_list[offset_index].second++;
+ if (offsets_list[offset_index].second > best_score) {
+ best_score = offsets_list[offset_index].second;
+ phase_best_offset = offsets_list[offset_index].first;
+ DPRINTF(HWPrefetch, "New best score is %lu\n", best_score);
+ }
+ }
+
+ offset_index++;
+
+ // All the offsets in the list were visited meaning that a learning
+ // phase finished. Check if
+ if (offset_index == offsets_list.size()) {
+ offset_index = 0;
+ round++;
+
+ // Check if the best offset must be updated if:
+ // (1) One of the scores equals SCORE_MAX
+ // (2) The number of rounds equals ROUND_MAX
+ if ((best_score >= score_max) || (round == round_max)) {
+ best_offset = phase_best_offset;
+ round = 0;
+ best_score = 0;
+ phase_best_offset = 0;
+ reset_scores();
+ enabled = true;
+ } else if (phase_best_offset <= bad_score) {
+ enabled = false;
+ DPRINTF(HWPrefetch, "Disabling prefetches\n");
+ }
+ }
+}
+
+void
+BOPPrefetcher::init()
+{
+ unsigned int i = 0;
+ int64_t offset_i = 1;
+
+ // Following the paper implementation, a list with the specified number
+ // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
+ while (i < offset_list_size)
+ {
+ int64_t offset = offset_i;
+
+ for (unsigned n = 2; n <= 5; n++) {
+ while ((offset % n) == 0) {
+ offset /= n;
+ }
+ }
+
+ if (offset == 1) {
+ offsets_list.push_back(OffsetListEntry(offset_i, 0));
+ // If we want to use negative offsets, add also the negative
value
+ // of the offset just calculated
+ if (use_negative_offsets) {
+ offsets_list.push_back(OffsetListEntry(-offset_i, 0));
+ }
+
+ i++;
+ }
+
+ offset_i++;
+ }
+}
+
+void
+BOPPrefetcher::calculatePrefetch(const PacketPtr &pkt,
+ std::vector<AddrPriority> &addresses)
+{
+ Addr line_x = tag(pkt->getAddr());
+
+ if (delay_queue_enabled) {
+ insert_into_delay_queue(line_x);
+ } else {
+ insert_into_rr(line_x, LEFT);
+ }
+
+ // Go through the nth offset and update the score, the best score and
the
+ // current best offset if a better one is found
+ best_offset_learning(line_x);
+
+ // This prefetcher is a degree 1 prefetch, so it will only generate one
+ // prefetch at most per access
+ if (enabled) {
+ addresses.push_back(AddrPriority(line_x + best_offset, 0));
+ DPRINTF(HWPrefetch, "Generated prefetcher %#lx + %#lx\n",
+ line_x, best_offset);
+ }
+}
+
+void
+BOPPrefetcher::notifyCacheFill(const PacketPtr &pkt)
+{
+ Addr line_y = tag(pkt->getAddr());
+
+ if (enabled) {
+ insert_into_rr(line_y - best_offset, RIGHT);
+ }
+}
+
+BOPPrefetcher*
+BOPPrefetcherParams::create()
+{
+ return new BOPPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/bop.hh b/src/mem/cache/prefetch/bop.hh
new file mode 100644
index 0000000..3c71472
--- /dev/null
+++ b/src/mem/cache/prefetch/bop.hh
@@ -0,0 +1,148 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_BOP_HH__
+#define __MEM_CACHE_PREFETCH_BOP_HH__
+
+#include <queue>
+
+#include "mem/cache/prefetch/queued.hh"
+#include "mem/packet.hh"
+
+#define LEFT 0 // Skewed cache way for left table
+#define RIGHT 1 // Skewed cache way for right table
+
+struct BOPPrefetcherParams;
+
+class BOPPrefetcher : public QueuedPrefetcher
+{
+ private:
+
+ /** Learning phase parameters */
+ const unsigned int score_max;
+ const unsigned int round_max;
+ const unsigned int bad_score;
+ /** Recent requests table parameteres */
+ const unsigned int rr_entries;
+ const unsigned int tag_mask;
+ /** Offsets list parameters */
+ const unsigned int offset_list_size;
+ const bool use_negative_offsets;
+ /** Delay queue parameters */
+ const bool delay_queue_enabled;
+ const unsigned int delay_queue_size;
+ const unsigned int delay_ticks;
+ /** (Parent) Cache parameters */
+ const unsigned int block_size;
+
+ std::vector<Addr> rr_left;
+ std::vector<Addr> rr_right;
+
+ /** Structure to save the offset and the score */
+ typedef std::pair<int64_t, uint8_t> OffsetListEntry;
+ std::vector<OffsetListEntry> offsets_list;
+
+ struct DelayQueueEntry
+ {
+ Addr baseaddr;
+ Tick insert_tick;
+
+ DelayQueueEntry(Addr x) : baseaddr(x), insert_tick(curTick())
+ {}
+ };
+
+ std::deque<DelayQueueEntry> delay_queue;
+
+ /** Event to handle the delay queue processing */
+ void delayQueueEventWrapper();
+ EventFunctionWrapper delayQueueEvent;
+
+ /** Hardware prefetcher enabled */
+ bool enabled;
+ /** Current best offset to issue prefetches */
+ Addr best_offset;
+ /** Current best offset found in the learning phase */
+ Addr phase_best_offset;
+ /** Current test offset index */
+ unsigned int offset_index;
+ /** Max score found so far */
+ unsigned int best_score;
+ /** Number of rounds */
+ unsigned int round;
+
+ /** Generate a hash for the specified address to index the RR table
+ * @param addr: address to hash
+ * @param way: RR table to which is addressed (left/right)
+ */
+ unsigned int hash(Addr addr, unsigned int way);
+
+ /** Insert the specified address into the RR table
+ * @param addr: address to insert
+ * @param way: RR table to which the address will be inserted
+ */
+ void insert_into_rr(Addr addr, unsigned int way);
+
+ /** Insert the specified address into the delay queue. This will
+ * trigger an event after the delay cycles pass
+ * @param addr: address to insert into the delay queue
+ */
+ void insert_into_delay_queue(Addr addr);
+
+ /** Reset all the scores from the offset list */
+ void reset_scores();
+
+ /** Generate the tag for the specified address based on the tag
bits
+ and the block size */
+ unsigned int tag(Addr addr);
+
+ protected:
+
+ /** Test if @X-O is hitting in the RR table to update the
+ offset score */
+ bool test_offset(Addr);
+
+ /** Learning phase of the BOP. Update the intermediate values of
the
+ round and update the best offset if found */
+ void best_offset_learning(Addr);
+
+ /** Update the RR right table after a prefetch fill */
+ void notifyCacheFill(const PacketPtr& pkt);
+
+ public:
+
+ BOPPrefetcher(const BOPPrefetcherParams *p);
+ ~BOPPrefetcher() {}
+
+ void init();
+ void calculatePrefetch(const PacketPtr &pkt,
+ std::vector<AddrPriority> &addresses);
+};
+
+#endif /* __MEM_CACHE_PREFETCH_BOP_HH__ */
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14820
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I61bb89ca5639356d54aeb04e856d5bf6e8805c22
Gerrit-Change-Number: 14820
Gerrit-PatchSet: 1
Gerrit-Owner: Ivan Pizarro <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev