[gem5-dev] Change in gem5/gem5[master]: mem: Add support for partial stores and wide memory accesses

Giacomo Gabrielli (Gerrit) Mon, 15 Oct 2018 09:09:50 -0700

Giacomo Gabrielli has uploaded this change for review. (https://gem5-review.googlesource.com/c/public/gem5/+/13518


Change subject: mem: Add support for partial stores and wide memory accesses
......................................................................

mem: Add support for partial stores and wide memory accesses

This changeset adds support for partial (or masked) stores, i.e.
stores that can disable writes to individual bytes within the target
address range.  In addition, this changeset extends the code to crack
memory accesses across most CPU models (TimingSimpleCPU still TBD), so
that arbitrarily wide memory accesses are supported.  These changes
are required for supporting ISAs with wide vectors.

Change-Id: Ibad33541c258ad72925c0b1d5abc3e5e8bf92d92
Signed-off-by: Giacomo Gabrielli <[email protected]>
---
M src/cpu/base_dyn_inst.hh
M src/cpu/checker/cpu.cc
M src/cpu/checker/cpu.hh
M src/cpu/exec_context.hh
M src/cpu/minor/exec_context.hh
M src/cpu/minor/lsq.cc
M src/cpu/minor/lsq.hh
M src/cpu/o3/cpu.hh
M src/cpu/o3/lsq.hh
M src/cpu/o3/lsq_impl.hh
M src/cpu/simple/atomic.cc
M src/cpu/simple/atomic.hh
M src/cpu/simple/base.hh
M src/cpu/simple/exec_context.hh
M src/cpu/simple/timing.cc
M src/cpu/simple/timing.hh
A src/cpu/utils.hh
M src/mem/abstract_mem.cc
M src/mem/cache/cache.cc
M src/mem/packet.hh
M src/mem/request.hh
21 files changed, 409 insertions(+), 170 deletions(-)



diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index f5aa83e..7bc88ce 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -301,7 +301,8 @@
     Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags);

     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
-                   Request::Flags flags, uint64_t *res);
+                   Request::Flags flags, uint64_t *res,

+ const std::vector<bool>& byteEnable =std::vector<bool>());


     /** True if the DTB address translation has started. */

bool translationStarted() const { returninstFlags[TranslationStarted]; }

@@ -914,11 +915,12 @@
 template<class Impl>
 Fault
 BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
-                            Request::Flags flags, uint64_t *res)
+                            Request::Flags flags, uint64_t *res,
+                            const std::vector<bool>& byteEnable)
 {
     return cpu->pushRequest(
             dynamic_cast<typename DynInstPtr::ptr_type>(this),
-            /* st */ false, data, size, addr, flags, res);
+            /* st */ false, data, size, addr, flags, res, byteEnable);
 }

 #endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index fe1c3d4..99db59b 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -53,6 +53,7 @@
 #include "cpu/simple_thread.hh"
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
+#include "cpu/utils.hh"
 #include "params/CheckerCPU.hh"
 #include "sim/full_system.hh"

@@ -145,20 +146,23 @@
                     Request::Flags flags)
 {
     Fault fault = NoFault;
-    int fullSize = size;
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;

-
-    if (secondAddr > addr)
-       size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;

// Need to account for multiple accesses like the Atomic andTimingSimple

     while (1) {
+        frag_size = std::min(
+            cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+            (Addr) size_left);
+        size_left -= frag_size;
+
         auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
+            0, frag_addr, frag_size, flags, masterId,
             thread->pcState().instAddr(), tc->contextId());

         // translate to physical address
@@ -183,7 +187,7 @@
                 dcachePort->sendFunctional(pkt);
             } else {
                 // Assume the data is correct if it's an uncached access
-                memcpy(data, unverifiedMemData, size);
+                memcpy(data, unverifiedMemData, frag_size);
             }

             delete pkt;
@@ -197,22 +201,21 @@
         }

         //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
+        if (size_left == 0)
         {
             break;
         }

         // Setup for accessing next cache line
-        data += size;
-        unverifiedMemData += size;
-        size = addr + fullSize - secondAddr;
-        addr = secondAddr;
+        frag_addr += frag_size;
+        data += frag_size;
+        unverifiedMemData += frag_size;
     }

     if (!flags_match) {

warn("%lli: Flags do not match CPU:%#x %#x %#xChecker:%#x %#x %#x\n",curTick(), unverifiedReq->getVaddr(),unverifiedReq->getPaddr(),

-             unverifiedReq->getFlags(), addr, pAddr, flags);
+             unverifiedReq->getFlags(), frag_addr, pAddr, flags);
         handleError();
     }

@@ -221,27 +224,40 @@

 Fault
 CheckerCPU::writeMem(uint8_t *data, unsigned size,
-                     Addr addr, Request::Flags flags, uint64_t *res)
+                     Addr addr, Request::Flags flags, uint64_t *res,
+                     const std::vector<bool>& byteEnable)
 {
+    assert(byteEnable.empty() || byteEnable.size() == size);
+
     Fault fault = NoFault;
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;
     static uint8_t zero_data[64] = {};

-    int fullSize = size;
-
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;

     // Need to account for a multiple access like Atomic and Timing CPUs
     while (1) {
+        frag_size = std::min(
+            cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+            (Addr) size_left);
+        size_left -= frag_size;
+
         auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
+            0, frag_addr, frag_size, flags, masterId,
             thread->pcState().instAddr(), tc->contextId());

+        if (!byteEnable.empty()) {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = byteEnable.begin() + (size - (frag_size +
+                                                          size_left));
+            auto it_end = byteEnable.begin() + (size - size_left);

+ mem_req->setWriteByteEnable(std::vector<bool>(it_start,it_end));

+        }
+
         // translate to physical address
         fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);

@@ -262,7 +278,7 @@
         bool was_prefetch = mem_req->isPrefetch();

         //If we don't need to access a second cache line, stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
         {
             if (fault != NoFault && was_prefetch) {
               fault = NoFault;
@@ -270,16 +286,13 @@
             break;
         }

-        //Update size and access address
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        frag_addr += frag_size;
    }

    if (!flags_match) {
        warn("%lli: Flags do not match CPU:%#x %#x Checker:%#x %#x %#x\n",

curTick(), unverifiedReq->getVaddr(),unverifiedReq->getPaddr(),

-            unverifiedReq->getFlags(), addr, pAddr, flags);
+            unverifiedReq->getFlags(), frag_addr, pAddr, flags);
        handleError();
    }

@@ -305,12 +318,12 @@
    // const set of zeros.
    if (flags & Request::STORE_NO_DATA) {
        assert(!data);
-       assert(sizeof(zero_data) <= fullSize);
+       assert(sizeof(zero_data) <= size);
        data = zero_data;
    }

    if (unverifiedReq && unverifiedMemData &&
-       memcmp(data, unverifiedMemData, fullSize) && extraData) {
+       memcmp(data, unverifiedMemData, size) && extraData) {
            warn("%lli: Store value does not match value sent to memory! "
                   "data: %#x inst_data: %#x", curTick(), data,
                   unverifiedMemData);
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 8b2395e..d18305e 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -537,7 +537,9 @@
     Fault readMem(Addr addr, uint8_t *data, unsigned size,
                   Request::Flags flags) override;
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
-                   Request::Flags flags, uint64_t *res) override;
+                   Request::Flags flags, uint64_t *res,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override;

     unsigned int readStCondFailures() const override {
         return thread->readStCondFailures();
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 293e139..400d9ec 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -270,7 +270,9 @@
      * For timing-mode contexts, initiate a timing memory write operation.
      */
     virtual Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
-                           Request::Flags flags, uint64_t *res) = 0;
+                           Request::Flags flags, uint64_t *res,
+                           const std::vector<bool>& byteEnable =
+                               std::vector<bool>()) = 0;

     /**
      * Sets the number of consecutive store conditional failures.
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index bc86276..df66b11 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -114,10 +114,13 @@

     Fault
     writeMem(uint8_t *data, unsigned int size, Addr addr,
-             Request::Flags flags, uint64_t *res) override
+             Request::Flags flags, uint64_t *res,
+             const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
     {
+        assert(byteEnable.empty() || byteEnable.size() == size);
         execute.getLSQ().pushRequest(inst, false /* store */, data,
-            size, addr, flags, res);
+            size, addr, flags, res, byteEnable);
         return NoFault;
     }

diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index ad103b0..7c7c74d 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -48,29 +48,16 @@
 #include "cpu/minor/exec_context.hh"
 #include "cpu/minor/execute.hh"
 #include "cpu/minor/pipeline.hh"
+#include "cpu/utils.hh"
 #include "debug/Activity.hh"
 #include "debug/MinorMem.hh"

 namespace Minor
 {

-/** Returns the offset of addr into an aligned a block of size block_size*/

-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
-    return addr & (block_size - 1);
-}
-
-/** Returns true if the given [addr .. addr+size-1] transfer needs to be
- *  fragmented across a block size of block_size */
-static bool
-transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
-{
-    return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-

LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, boolisLoad_,

-    PacketDataPtr data_, uint64_t *res_) :
+    PacketDataPtr data_, uint64_t *res_,
+    const std::vector<bool>& writeByteEnable_) :
     SenderState(),
     port(port_),
     inst(inst_),
@@ -80,6 +67,7 @@
     request(),
     fault(NoFault),
     res(res_),
+    writeByteEnable(writeByteEnable_),
     skipped(false),
     issuedToMemory(false),
     state(NotIssued)
@@ -321,8 +309,9 @@
 }

 LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
-    bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
-    LSQRequest(port_, inst_, isLoad_, data_, res_),
+    bool isLoad_, PacketDataPtr data_, uint64_t *res_,
+    const std::vector<bool>& writeByteEnable_) :
+    LSQRequest(port_, inst_, isLoad_, data_, res_, writeByteEnable_),
     translationEvent([this]{ sendNextFragmentToTranslation(); },
                      "translationEvent"),
     numFragments(0),
@@ -356,6 +345,8 @@
     unsigned int fragment_size;
     Addr fragment_addr;

+    std::vector<bool> fragment_write_byte_en;
+
     /* Assume that this transfer is across potentially many block snap
      * boundaries:
      *
@@ -400,6 +391,8 @@
     /* Just past the last address in the request */
     Addr end_addr = base_addr + whole_size;

+    auto& writeByteEnable = request->getWriteByteEnable();
+
     for (unsigned int fragment_index = 0; fragment_index < numFragments;
          fragment_index++)
     {
@@ -427,6 +420,15 @@
             request->masterId(),
             request->getPC());

+        if (!writeByteEnable.empty()) {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = writeByteEnable.begin() +
+                (fragment_addr - base_addr);
+            auto it_end = writeByteEnable.begin() +
+                (fragment_addr - base_addr) + fragment_size;

+ fragment->setWriteByteEnable(std::vector<bool>(it_start,it_end));

+        }
+

DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%xsize: %d"

             " (whole request addr: 0x%x size: %d) %s\n",
             fragment_addr, fragment_size, base_addr, whole_size,
@@ -472,7 +474,8 @@
         assert(fragment->hasPaddr());

         PacketPtr fragment_packet =
-            makePacketForRequest(fragment, isLoad, this, request_data);
+            makePacketForRequest(fragment, isLoad, this, request_data,
+                                 fragment->getWriteByteEnable());

         fragmentPackets.push_back(fragment_packet);
         /* Accumulate flags in parent request */
@@ -1474,7 +1477,7 @@
 void
 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                  unsigned int size, Addr addr, Request::Flags flags,
-                 uint64_t *res)
+                 uint64_t *res, const std::vector<bool>& writeByteEnable)
 {
     bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
     LSQRequestPtr request;
@@ -1502,10 +1505,10 @@

     if (needs_burst) {
         request = new SplitDataRequest(
-            *this, inst, isLoad, request_data, res);
+            *this, inst, isLoad, request_data, res, writeByteEnable);
     } else {
         request = new SingleDataRequest(
-            *this, inst, isLoad, request_data, res);
+            *this, inst, isLoad, request_data, res, writeByteEnable);
     }

     if (inst->traceData)
@@ -1517,6 +1520,9 @@
         addr, size, flags, cpu.dataMasterId(),
         /* I've no idea why we need the PC, but give it */
         inst->pc.instAddr());
+    if (!writeByteEnable.empty()) {
+        request->request->setWriteByteEnable(writeByteEnable);
+    }

     requests.push(request);
     request->startAddrTranslation();
@@ -1554,8 +1560,10 @@

 PacketPtr
 makePacketForRequest(const RequestPtr &request, bool isLoad,
-    Packet::SenderState *sender_state, PacketDataPtr data)
+    Packet::SenderState *sender_state, PacketDataPtr data,
+    const std::vector<bool>& writeByteEnable)
 {
+    assert(!isLoad || writeByteEnable.empty());
     PacketPtr ret = isLoad ? Packet::createRead(request)
                            : Packet::createWrite(request);

@@ -1598,7 +1606,8 @@
         return;
     }

-    packet = makePacketForRequest(request, isLoad, this, data);
+    packet = makePacketForRequest(request, isLoad, this, data,
+                                  writeByteEnable);
     /* Null the ret data so we know not to deallocate it when the
      * ret is destroyed.  The data now belongs to the ret and
      * the ret is responsible for its destruction */
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index da873b4..66a2dd1 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -151,6 +151,9 @@
         /** Res from pushRequest */
         uint64_t *res;

+        /** Byte-enable mask for writes */
+        std::vector<bool> writeByteEnable;
+
         /** Was skipped.  Set to indicate any reason (faulted, bad
          *  stream sequence number, in a fault shadow) that this
          *  request did not perform a memory transfer */
@@ -190,7 +193,8 @@

       public:
         LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
-            PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
+            PacketDataPtr data_ = NULL, uint64_t *res_ = NULL,

+ const std::vector<bool>& writeByteEnable_ =std::vector<bool>());


         virtual ~LSQRequest();

@@ -366,8 +370,9 @@

       public:
         SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,

- bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ =NULL) :

-            LSQRequest(port_, inst_, isLoad_, data_, res_),

+ bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ =NULL,+ const std::vector<bool>& writeByteEnable_ =std::vector<bool>()) :+ LSQRequest(port_, inst_, isLoad_, data_, res_,writeByteEnable_),

             packetInFlight(false),
             packetSent(false)
         { }
@@ -412,7 +417,8 @@
       public:
         SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
             bool isLoad_, PacketDataPtr data_ = NULL,
-            uint64_t *res_ = NULL);
+            uint64_t *res_ = NULL,

+ const std::vector<bool>& writeByteEnable_ =std::vector<bool>());


         ~SplitDataRequest();

@@ -700,7 +706,9 @@
      *  the LSQ */
     void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                      unsigned int size, Addr addr, Request::Flags flags,
-                     uint64_t *res);
+                     uint64_t *res,
+                     const std::vector<bool>& writeByteEnable =
+                         std::vector<bool>());

     /** Push a predicate failed-representing request into the queues just
      *  to maintain commit order */
@@ -721,7 +729,8 @@
  *  data will be the payload data.  If sender_state is NULL, it won't be
  *  pushed into the packet as senderState */
 PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad,
-    Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL);
+    Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL,
+    const std::vector<bool>& writeByteEnable = std::vector<bool>());
 }

 #endif /* __CPU_MINOR_NEW_LSQ_HH__ */
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 02da25b..0f204ef 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -762,10 +762,12 @@
     /** CPU pushRequest function, forwards request to LSQ. */
     Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res)
+                      uint64_t *res,
+                      const std::vector<bool>& writeByteEnable =
+                          std::vector<bool>())
     {
         return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
-                flags, res);
+                flags, res, writeByteEnable);
     }

     /** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index a53f6a8..fd2fce6 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -50,6 +50,7 @@
 #include "arch/generic/tlb.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/lsq_unit.hh"
+#include "cpu/utils.hh"
 #include "mem/port.hh"
 #include "sim/sim_object.hh"

@@ -243,6 +244,7 @@
         const Addr _addr;
         const uint32_t _size;
         const Request::Flags _flags;
+        std::vector<bool> _writeByteEnable;
         uint32_t _numOutstandingPackets;
       protected:
         LSQUnit* lsqUnit() { return &_port; }
@@ -259,13 +261,16 @@
         LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                    const Addr& addr, const uint32_t& size,
                    const Request::Flags& flags_,
-                   PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+                   PacketDataPtr data = nullptr, uint64_t* res = nullptr,
+                   const std::vector<bool>& writeByteEnable =
+                       std::vector<bool>())
             : _state(State::NotIssued), _senderState(nullptr),
             numTranslatedFragments(0),
             numInTranslationFragments(0),
             _port(*port), _inst(inst), _data(data),
             _res(res), _addr(addr), _size(size),
             _flags(flags_),
+            _writeByteEnable(writeByteEnable),
             _numOutstandingPackets(0)
         {
             flags[(int)Flag::IsLoad] = isLoad;
@@ -369,9 +374,11 @@
          */
         void
         setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_,
-                MasterID mid, Addr pc)
+                MasterID mid, Addr pc,

+ const std::vector<bool>& writeByteEnable =std::vector<bool>())

         {
             request()->setVirt(asid, vaddr, size, flags_, mid, pc);
+            request()->setWriteByteEnable(writeByteEnable);
         }

         void taskId(const uint32_t& v)
@@ -593,12 +600,16 @@
                           const Addr& addr, const uint32_t& size,
                           const Request::Flags& flags_,
                           PacketDataPtr data = nullptr,
-                          uint64_t* res = nullptr) :
-            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+                          uint64_t* res = nullptr,
+                          const std::vector<bool>& writeByteEnable =
+                              std::vector<bool>()) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+                       writeByteEnable)
         {
             LSQRequest::_requests.push_back(

std::make_shared<Request>(inst->getASID(), addr, size,flags_,- inst->masterId(), inst->instAddr(),inst->contextId()));

+                    inst->masterId(), inst->instAddr(), inst->contextId(),
+                    writeByteEnable));
             LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
         }
         inline virtual ~SingleDataRequest() {}
@@ -655,8 +666,11 @@
                          const Addr& addr, const uint32_t& size,
                          const Request::Flags & flags_,
                          PacketDataPtr data = nullptr,
-                         uint64_t* res = nullptr) :
-            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+                         uint64_t* res = nullptr,
+                         const std::vector<bool>& writeByteEnable =
+                             std::vector<bool>()) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+                       writeByteEnable),
             numFragments(0),
             numReceivedPackets(0),
             mainReq(nullptr),
@@ -915,7 +929,7 @@

     Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res);

+ uint64_t *res, const std::vector<bool>&writeByteEnable);


     /** The CPU pointer. */
     O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index eaa1b23..dd70f0f 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -681,29 +681,12 @@
     }
 }

-static Addr
-addrBlockOffset(Addr addr, unsigned int block_size)
-{
-    return addr & (block_size - 1);
-}
-
-static Addr
-addrBlockAlign(Addr addr, uint64_t block_size)
-{
-    return addr & ~(block_size - 1);
-}
-
-static bool
-transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
-{
-    return (addrBlockOffset(addr, block_size) + size) > block_size;
-}
-
 template<class Impl>
 Fault
 LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                       uint64_t *res)
+                       uint64_t *res,
+                       const std::vector<bool>& writeByteEnable)
 {
     ThreadID tid = cpu->contextToThread(inst->contextId());
     auto cacheLineSize = cpu->cacheLineSize();
@@ -716,10 +699,10 @@
     } else {
         if (needs_burst) {
             req = new SplitDataRequest(&thread.at(tid), inst, isLoad, addr,
-                    size, flags, data, res);
+                    size, flags, data, res, writeByteEnable);
         } else {

req = new SingleDataRequest(&thread.at(tid), inst, isLoad,addr,

-                    size, flags, data, res);
+                    size, flags, data, res, writeByteEnable);
         }
         assert(req);
         inst->setRequest();
@@ -876,7 +859,10 @@

     mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
                 _size, _flags, _inst->masterId(),
-                _inst->instAddr(), _inst->contextId());
+                _inst->instAddr(), _inst->contextId(),
+                this->_writeByteEnable);
+
+    auto& writeByteEnable = mainReq->getWriteByteEnable();

     // Paddr is not used in mainReq. However, we will accumulate the flags

// from the sub requests into mainReq by calling setFlags() infinish().

@@ -885,26 +871,56 @@
     mainReq->setPaddr(0);

     /* Get the pre-fix, possibly unaligned. */

- _requests.push_back(std::make_shared<Request>(_inst->getASID(),base_addr,

-                next_addr - base_addr, _flags, _inst->masterId(),
-                _inst->instAddr(), _inst->contextId()));
+    if (writeByteEnable.empty()) {
+        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                    base_addr, next_addr - base_addr, _flags,

+ _inst->masterId(), _inst->instAddr(),_inst->contextId()));

+    } else {
+        auto it_start = writeByteEnable.begin();
+        auto it_end = writeByteEnable.begin() + (next_addr - base_addr);
+        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                    base_addr, next_addr - base_addr, _flags,

+ _inst->masterId(), _inst->instAddr(),_inst->contextId(),

+                    std::vector<bool>(it_start, it_end)));
+    }
     size_so_far = next_addr - base_addr;

     /* We are block aligned now, reading whole blocks. */
     base_addr = next_addr;
     while (base_addr != final_addr) {
-        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
-                    base_addr, cacheLineSize, _flags, _inst->masterId(),
-                    _inst->instAddr(), _inst->contextId()));
+        if (writeByteEnable.empty()) {
+            _requests.push_back(std::make_shared<Request>(_inst->getASID(),

+ base_addr, cacheLineSize, _flags,_inst->masterId(),

+                        _inst->instAddr(), _inst->contextId()));
+        } else {
+            auto it_start = writeByteEnable.begin() + size_so_far;
+            auto it_end = writeByteEnable.begin() + size_so_far +
+                cacheLineSize;
+            _requests.push_back(std::make_shared<Request>(_inst->getASID(),

+ base_addr, cacheLineSize, _flags,_inst->masterId(),

+                        _inst->instAddr(), _inst->contextId(),
+                        std::vector<bool>(it_start, it_end)));
+        }
         size_so_far += cacheLineSize;
         base_addr += cacheLineSize;
     }

     /* Deal with the tail. */
     if (size_so_far < _size) {
-        _requests.push_back(std::make_shared<Request>(_inst->getASID(),

- base_addr, _size - size_so_far, _flags,_inst->masterId(),

-                    _inst->instAddr(), _inst->contextId()));
+        if (writeByteEnable.empty()) {
+            _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                        base_addr, _size - size_so_far, _flags,
+                        _inst->masterId(), _inst->instAddr(),
+                        _inst->contextId()));
+        } else {
+            auto it_start = writeByteEnable.begin() + size_so_far;
+            auto it_end = writeByteEnable.end();
+            _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                        base_addr, _size - size_so_far, _flags,
+                        _inst->masterId(), _inst->instAddr(),
+                        _inst->contextId(),
+                        std::vector<bool>(it_start, it_end)));
+        }
     }

     /* Setup the requests and send them to translation. */
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index e91fafb..66b2f75b 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -49,6 +49,7 @@
 #include "base/output.hh"
 #include "config/the_isa.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/utils.hh"
 #include "debug/Drain.hh"
 #include "debug/ExecFaulting.hh"
 #include "debug/SimpleCPU.hh"
@@ -344,25 +345,26 @@
     if (traceData)
         traceData->setMem(addr, size, flags);

-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
     dcache_latency = 0;

     req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+
     while (1) {

- req->setVirt(0, addr, size, flags, dataMasterId(),thread->pcState().instAddr());

+        frag_size = std::min(
+            cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+            (Addr) size_left);
+        size_left -= frag_size;
+
+        req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                     thread->pcState().instAddr());

         // translate to physical address
         Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
-                                                          BaseTLB::Read);
+                                                   BaseTLB::Read);

         // Now do the access.

if (fault == NoFault&& !req->getFlags().isSet(Request::NO_ACCESS)) {

@@ -392,27 +394,22 @@
             }
         }

-        //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
-        {
+        // If we don't need to access further cache lines, stop now.
+        if (size_left == 0) {
             if (req->isLockedRMW() && fault == NoFault) {
                 assert(!locked);
                 locked = true;
             }
-
             return fault;
         }

         /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
          */
+        frag_addr += frag_size;

         //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
     }
 }

@@ -426,7 +423,8 @@

 Fault
 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
-                          Request::Flags flags, uint64_t *res)
+                          Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
 {
     SimpleExecContext& t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -445,32 +443,48 @@
     if (traceData)
         traceData->setMem(addr, size, flags);

-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
     dcache_latency = 0;

     req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    int curr_frag_id = 0;
+
     while (1) {

- req->setVirt(0, addr, size, flags, dataMasterId(),thread->pcState().instAddr());

+        frag_size = std::min(
+            cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+            (Addr) size_left);
+        size_left -= frag_size;
+
+        req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                     thread->pcState().instAddr());
+
+        if (!byteEnable.empty()) {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = byteEnable.begin() + (size - (frag_size +
+                                                          size_left));
+            auto it_end = byteEnable.begin() + (size - size_left);
+
+            req->setWriteByteEnable(std::vector<bool>(it_start, it_end));
+        }

         // translate to physical address

- Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),BaseTLB::Write);

+        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                   BaseTLB::Write);

         // Now do the access.
         if (fault == NoFault) {
             bool do_access = true;  // flag to suppress cache access

             if (req->isLLSC()) {

- do_access = TheISA::handleLockedWrite(thread, req,dcachePort.cacheBlockMask);

+                assert(curr_frag_id == 0);
+                do_access =
+                    TheISA::handleLockedWrite(thread, req,
+                                              dcachePort.cacheBlockMask);
             } else if (req->isSwap()) {
+                assert(curr_frag_id == 0);
                 if (req->isCondSwap()) {
                     assert(res);
                     req->setExtraData(*res);
@@ -494,8 +508,8 @@
                 assert(!pkt.isError());

                 if (req->isSwap()) {
-                    assert(res);
-                    memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
+                    assert(res && curr_frag_id == 0);
+                    memcpy(res, pkt.getConstPtr<uint8_t>(), size);
                 }
             }

@@ -506,14 +520,13 @@

//If there's a fault or we don't need to access a second cacheline,

         //stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
         {
             if (req->isLockedRMW() && fault == NoFault) {
-                assert(locked);
+                assert(locked && curr_frag_id == 0);
                 locked = false;
             }

-
             if (fault != NoFault && req->isPrefetch()) {
                 return NoFault;
             } else {
@@ -522,15 +535,14 @@
         }

         /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
          */
+        frag_addr += frag_size;

         //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
+
+        curr_frag_id++;
     }
 }

diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index a5151aa..e76cd2b 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -201,7 +201,9 @@
                           Request::Flags flags) override;

     Fault writeMem(uint8_t *data, unsigned size,

- Addr addr, Request::Flags flags, uint64_t *res)override;

+                   Addr addr, Request::Flags flags, uint64_t *res,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override;

     void regProbePoints() override;

diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index e62fcf4..d717cac 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -149,7 +149,9 @@
                                   Request::Flags flags) = 0;

     virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
-                           Request::Flags flags, uint64_t* res) = 0;
+                           Request::Flags flags, uint64_t* res,
+                           const std::vector<bool>& byteEnable =
+                               std::vector<bool>()) = 0;

     void countInst();
     Counter totalInsts() const override;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 814a7ee..e473d13 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -457,9 +457,12 @@
     }

     Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
-                   Request::Flags flags, uint64_t *res) override
+                   Request::Flags flags, uint64_t *res,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override
     {
-        return cpu->writeMem(data, size, addr, flags, res);
+        assert(byteEnable.empty() || byteEnable.size() == size);
+        return cpu->writeMem(data, size, addr, flags, res, byteEnable);
     }

     /**
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index b5450cf..c03f601 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -498,7 +498,8 @@

 Fault
 TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
-                          Addr addr, Request::Flags flags, uint64_t *res)
+                          Addr addr, Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
 {
     SimpleExecContext &t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -522,7 +523,7 @@

     RequestPtr req = std::make_shared<Request>(
         asid, addr, size, flags, dataMasterId(), pc,
-        thread->contextId());
+        thread->contextId(), byteEnable);

     req->taskId(taskId());

@@ -530,6 +531,10 @@
     assert(split_addr <= addr || split_addr - addr < block_size);

     _status = DTBWaitResponse;
+

+ // TODO: TimingSimpleCPU doesn't support arbitrarily long multi-linemem.

+    // accesses yet
+
     if (split_addr > addr) {
         RequestPtr req1, req2;
         assert(!req->isLLSC() && !req->isSwap());
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 0300d38..05111aa 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -289,7 +289,9 @@
                           Request::Flags flags) override;

     Fault writeMem(uint8_t *data, unsigned size,

- Addr addr, Request::Flags flags, uint64_t *res)override;

+                   Addr addr, Request::Flags flags, uint64_t *res,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override;

     void fetch();
     void sendFetch(const Fault &fault,
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh
new file mode 100644
index 0000000..f2cc089
--- /dev/null
+++ b/src/cpu/utils.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Andrew Bardsley
+ */
+
+#ifndef __CPU_UTILS_HH__
+#define __CPU_UTILS_HH__
+
+#include "base/types.hh"
+

+/** Returns the offset of `addr` into an aligned block of size`block_size` */

+inline Addr
+addrBlockOffset(Addr addr, Addr block_size)
+{
+    return addr & (block_size - 1);
+}
+

+/** Returns the address of the aligned block of size `block_size` closestto

+ *  `addr` */
+inline Addr
+addrBlockAlign(Addr addr, Addr block_size)
+{
+    return addr & ~(block_size - 1);
+}
+

+/** Returns true if the given [`addr` .. `addr`+`size`-1] transfer needsto be

+ *  fragmented across a block size of `block_size` */
+inline bool
+transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
+{
+    return (addrBlockOffset(addr, block_size) + size) > block_size;
+}
+
+#endif // __CPU_UTILS_HH__
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 01817bb..e219005 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -408,9 +408,18 @@
     } else if (pkt->isWrite()) {
         if (writeOK(pkt)) {
             if (pmemAddr) {

- memcpy(hostAddr, pkt->getConstPtr<uint8_t>(),pkt->getSize());

-                DPRINTF(MemoryAccess, "%s wrote %i bytes to address %x\n",
-                        __func__, pkt->getSize(), pkt->getAddr());
+                if (pkt->isMaskedWrite()) {
+                    for (int i = 0; i < pkt->getSize(); i++) {
+                        if (pkt->req->getWriteByteEnable()[i]) {
+                            hostAddr[i] = pkt->getConstPtr<uint8_t>()[i];
+                        }
+                    }
+                } else {
+                    memcpy(hostAddr, pkt->getConstPtr<uint8_t>(),
+                           pkt->getSize());

+ DPRINTF(MemoryAccess, "%s wrote %i bytes toaddress %x\n",

+                            __func__, pkt->getSize(), pkt->getAddr());
+                }
             }
             assert(!pkt->req->isInstFetch());
             TRACE_PACKET("Write");
@@ -440,8 +449,17 @@
         TRACE_PACKET("Read");
         pkt->makeResponse();
     } else if (pkt->isWrite()) {
-        if (pmemAddr)
-            memcpy(hostAddr, pkt->getConstPtr<uint8_t>(), pkt->getSize());
+        if (pmemAddr) {
+            if (pkt->isMaskedWrite()) {
+                for (int i = 0; i < pkt->getSize(); i++) {
+                    if (pkt->req->getWriteByteEnable()[i]) {
+                        hostAddr[i] = pkt->getConstPtr<uint8_t>()[i];
+                    }
+                }
+            } else {

+ memcpy(hostAddr, pkt->getConstPtr<uint8_t>(),pkt->getSize());

+            }
+        }
         TRACE_PACKET("Write");
         pkt->makeResponse();
     } else if (pkt->isPrint()) {
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 116b543..ef12abc 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -305,7 +305,8 @@
 {
     // Cache line clearing instructions
     if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
-        (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0)) {
+        (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0) &&
+        !pkt->isMaskedWrite()) {
         pkt->cmd = MemCmd::WriteLineReq;
         DPRINTF(Cache, "packet promoted from Write to WriteLineReq\n");
     }
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d82ca4f..14cc450 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -950,7 +950,6 @@
         flags.set(VALID_SIZE);
     }

-
   public:
     /**
      * @{
@@ -1023,6 +1022,7 @@
     getPtr()
     {
         assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA));
+        assert(!isWrite() || req->getWriteByteEnable().empty());
         return (T*)data;
     }

@@ -1129,7 +1129,17 @@
     void
     writeData(uint8_t *p) const
     {
-        std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+        if (req->getWriteByteEnable().empty()) {
+            std::memcpy(p, getConstPtr<uint8_t>(), getSize());
+        } else {
+            // Write only the enabled bytes
+            for (int i = 0; i < getSize(); i++) {
+                if (req->getWriteByteEnable()[i]) {
+                    p[i] = *(getConstPtr<uint8_t>() + i);
+                }
+                // Disabled bytes stay untouched
+            }
+        }
     }

     /**
@@ -1194,6 +1204,15 @@
     bool
     trySatisfyFunctional(PacketPtr other)
     {

+ if (other->isWrite() && !other->req->getWriteByteEnable().empty()){

+            if (getAddr() <= (other->getAddr() + other->getSize() - 1) &&
+                other->getAddr() <= (getAddr() + getSize() - 1)) {
+                warn("Trying to check against a masked write, skipping."
+                     " (addr: 0x%x, other addr: 0x%x)", getAddr(),
+                     other->getAddr());
+            }
+            return false;
+        }
         // all packets that are carrying a payload should have a valid
         // data pointer

return trySatisfyFunctional(other, other->getAddr(),other->isSecure(),

@@ -1222,6 +1241,12 @@
         return cmd == MemCmd::CleanEvict || cmd == MemCmd::WritebackClean;
     }

+    bool
+    isMaskedWrite() const
+    {
+        return !req->getWriteByteEnable().empty();
+    }
+
     /**
      * Check a functional request against a memory value represented
      * by a base/size pair and an associated data array. If the
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 3df29aa..d92c584 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -320,6 +320,9 @@
      */
     unsigned _size;

+    /** Byte-enable mask for writes. */
+    std::vector<bool> _writeByteEnable;
+
     /** The requestor ID which is unique in the system for all ports
      * that are capable of issuing a transaction
      */
@@ -454,7 +457,8 @@
     }

     Request(uint64_t asid, Addr vaddr, unsigned size, Flags flags,
-            MasterID mid, Addr pc, ContextID cid)
+            MasterID mid, Addr pc, ContextID cid,
+            const std::vector<bool>& writeByteEnable = std::vector<bool>())
         : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
           _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
           _extraData(0), _contextId(0), _pc(0),
@@ -462,6 +466,7 @@
           accessDelta(0), depth(0)
     {
         setVirt(asid, vaddr, size, flags, mid, pc);
+        setWriteByteEnable(writeByteEnable);
         setContext(cid);
     }

@@ -567,6 +572,9 @@
      * Generate two requests as if this request had been split into two
      * pieces. The original request can't have been translated already.
      */
+    // TODO: this function is still required by TimingSimpleCPU - should be

+ // removed once TimingSimpleCPU will support arbitrarily longmulti-line

+    // mem. accesses
     void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
     {
         assert(privateFlags.isSet(VALID_VADDR));
@@ -577,6 +585,14 @@
         req1->_size = split_addr - _vaddr;
         req2->_vaddr = split_addr;
         req2->_size = _size - req1->_size;
+        if (!_writeByteEnable.empty()) {
+            req1->_writeByteEnable = std::vector<bool>(
+                _writeByteEnable.begin(),
+                _writeByteEnable.begin() + req1->_size);
+            req2->_writeByteEnable = std::vector<bool>(
+                _writeByteEnable.begin() + req1->_size,
+                _writeByteEnable.end());
+        }
     }

     /**
@@ -628,6 +644,19 @@
         return _size;
     }

+    const std::vector<bool>&
+    getWriteByteEnable() const
+    {
+        return _writeByteEnable;
+    }
+
+    void
+    setWriteByteEnable(const std::vector<bool>& wbe)
+    {
+        assert(wbe.empty() || wbe.size() == _size);
+        _writeByteEnable = wbe;
+    }
+
     /** Accessor for time. */
     Tick
     time() const

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/13518

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: Ibad33541c258ad72925c0b1d5abc3e5e8bf92d92
Gerrit-Change-Number: 13518
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Gabrielli <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

[gem5-dev] Change in gem5/gem5[master]: mem: Add support for partial stores and wide memory accesses

Reply via email to