[gem5-dev] Change in gem5/gem5[master]: mem, cpu: Add support for masked reads and clean up masked writes

Gabor Dozsa (Gerrit) Thu, 28 Feb 2019 06:28:53 -0800

Gabor Dozsa has uploaded this change for review. (https://gem5-review.googlesource.com/c/public/gem5/+/16868

Change subject: mem,cpu: Add support for masked reads and clean up maskedwrites

......................................................................

mem,cpu: Add support for masked reads and clean up masked writes

This patch adds support for masked reads for the various cpu models.
It also renames writeByteEnable to byteEnable in memory requests as
probes can be used by both reads and writes.

Note that read strobes are ignored for normal memory and cache
transactions.

Change-Id: I9c91fdbe192af56e95a98a20490fa386a2966f24
Signed-off-by: Gabor Dozsa <[email protected]>
---
M src/cpu/base_dyn_inst.hh
M src/cpu/checker/cpu.cc
M src/cpu/checker/cpu.hh
M src/cpu/exec_context.hh
M src/cpu/minor/exec_context.hh
M src/cpu/minor/execute.cc
M src/cpu/minor/lsq.cc
M src/cpu/minor/lsq.hh
M src/cpu/o3/lsq.hh
M src/cpu/o3/lsq_impl.hh
M src/cpu/simple/atomic.cc
M src/cpu/simple/atomic.hh
M src/cpu/simple/base.hh
M src/cpu/simple/exec_context.hh
M src/cpu/simple/timing.cc
M src/cpu/simple/timing.hh
M src/cpu/utils.hh
M src/mem/abstract_mem.cc
M src/mem/packet.hh
M src/mem/request.hh
20 files changed, 403 insertions(+), 228 deletions(-)



diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index ec67b6f..261c007 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -299,7 +299,8 @@
         cpu->demapPage(vaddr, asn);
     }

-    Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags);
+    Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>());

     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                    Request::Flags flags, uint64_t *res,
@@ -916,11 +917,12 @@
 template<class Impl>
 Fault
 BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
-                                   Request::Flags flags)
+                                   Request::Flags flags,
+                                   const std::vector<bool>& byteEnable)
 {
     return cpu->pushRequest(
             dynamic_cast<typename DynInstPtr::PtrType>(this),
-            /* ld */ true, nullptr, size, addr, flags, nullptr);

+ /* ld */ true, nullptr, size, addr, flags, nullptr,byteEnable);

 }

 template<class Impl>
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index 99db59b..4480264 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013,2017 ARM Limited
+ * Copyright (c) 2011,2013,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -143,7 +143,8 @@

 Fault
 CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
-                    Request::Flags flags)
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable)
 {
     Fault fault = NoFault;
     bool checked_flags = false;
@@ -153,22 +154,42 @@
     Addr frag_addr = addr;
     int frag_size = 0;
     int size_left = size;
+    bool predicate;

// Need to account for multiple accesses like the Atomic andTimingSimple

     while (1) {
+        predicate = true;
         frag_size = std::min(
             cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
             (Addr) size_left);
         size_left -= frag_size;

-        auto mem_req = std::make_shared<Request>(
-            0, frag_addr, frag_size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        std::shared_ptr<Request> mem_req;

+        if (!byteEnable.empty()) {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = byteEnable.cbegin() + (size - (frag_size +
+                                                          size_left));
+            auto it_end = byteEnable.cbegin() + (size - size_left);
+            if (isAnyActiveElement(it_start, it_end)) {

+ mem_req = std::make_shared<Request>(0, frag_addr,frag_size,

+                        flags, masterId, thread->pcState().instAddr(),
+                        tc->contextId(),
+                        std::vector<bool>(it_start, it_end));
+            } else {
+                predicate = false;
+            }
+        } else {
+            mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                    flags, masterId, thread->pcState().instAddr(),
+                    tc->contextId());
+        }
         // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        }

-        if (!checked_flags && fault == NoFault && unverifiedReq) {

+ if (predicate && !checked_flags && fault == NoFault &&unverifiedReq) {

             flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),

mem_req->getPaddr(),mem_req->getFlags());

             pAddr = mem_req->getPaddr();
@@ -176,7 +197,7 @@
         }

         // Now do the access
-        if (fault == NoFault &&
+        if (predicate && fault == NoFault &&
             !mem_req->getFlags().isSet(Request::NO_ACCESS)) {
             PacketPtr pkt = Packet::createRead(mem_req);

@@ -238,30 +259,43 @@
     Addr frag_addr = addr;
     int frag_size = 0;
     int size_left = size;
+    bool predicate;

     // Need to account for a multiple access like Atomic and Timing CPUs
     while (1) {
+        predicate = true;
         frag_size = std::min(
             cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
             (Addr) size_left);
         size_left -= frag_size;

-        auto mem_req = std::make_shared<Request>(
-            0, frag_addr, frag_size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        std::shared_ptr<Request> mem_req;

         if (!byteEnable.empty()) {
             // Set up byte-enable mask for the current fragment
-            auto it_start = byteEnable.begin() + (size - (frag_size +
+            auto it_start = byteEnable.cbegin() + (size - (frag_size +
                                                           size_left));
-            auto it_end = byteEnable.begin() + (size - size_left);

- mem_req->setWriteByteEnable(std::vector<bool>(it_start,it_end));

+            auto it_end = byteEnable.cbegin() + (size - size_left);
+            if (isAnyActiveElement(it_start, it_end)) {

+ mem_req = std::make_shared<Request>(0, frag_addr,frag_size,

+                        flags, masterId, thread->pcState().instAddr(),
+                        tc->contextId(),
+                        std::vector<bool>(it_start, it_end));
+            } else {
+                predicate = false;
+            }
+        } else {
+            mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                        flags, masterId, thread->pcState().instAddr(),
+                        tc->contextId());
         }

         // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        }

-        if (!checked_flags && fault == NoFault && unverifiedReq) {

+ if (predicate && !checked_flags && fault == NoFault &&unverifiedReq) {

            flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),

mem_req->getPaddr(),mem_req->getFlags());

            pAddr = mem_req->getPaddr();
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 4a40f38..3ad3bc5 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -544,7 +544,9 @@
     }

     Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override;
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                    Request::Flags flags, uint64_t *res,

const std::vector<bool>& byteEnable =std::vector<bool>())

diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index d585096..75cc4e7 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -247,7 +247,8 @@
      * should never be called).
      */
     virtual Fault readMem(Addr addr, uint8_t *data, unsigned int size,
-                          Request::Flags flags)
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
     {
         panic("ExecContext::readMem() should be overridden\n");
     }
@@ -260,7 +261,8 @@
      * should never be called).
      */
     virtual Fault initiateMemRead(Addr addr, unsigned int size,
-                                  Request::Flags flags)
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
     {
         panic("ExecContext::initiateMemRead() should be overridden\n");
     }
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index a151c6a..3560a7e 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -97,6 +97,7 @@
         DPRINTF(MinorExecute, "ExecContext setting PC: %s\n", inst->pc);
         pcState(inst->pc);
         setPredicate(true);
+        setMemAccPredicate(true);
         thread.setIntReg(TheISA::ZeroReg, 0);
 #if THE_ISA == ALPHA_ISA
         thread.setFloatReg(TheISA::ZeroReg, 0.0);
@@ -105,10 +106,13 @@

     Fault
     initiateMemRead(Addr addr, unsigned int size,
-                    Request::Flags flags) override
+                    Request::Flags flags,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override
     {
         execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
-            size, addr, flags, NULL);
+                                     size, addr, flags, nullptr,
+                                     byteEnable);
         return NoFault;
     }

@@ -120,7 +124,7 @@
     {
         assert(byteEnable.empty() || byteEnable.size() == size);
         execute.getLSQ().pushRequest(inst, false /* store */, data,
-            size, addr, flags, res, byteEnable);
+                                     size, addr, flags, res, byteEnable);
         return NoFault;
     }

diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 7b76ca2..af244a4 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014,2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -473,18 +473,25 @@
         } else {
             /* Only set this if the instruction passed its
              * predicate */
+            if (!context.readMemAccPredicate()) {

+ DPRINTF(MinorMem, "No memory access for inst: %s\n",*inst);

+
+                inst->staticInst->completeAcc(nullptr, &context,
+                                              inst->traceData);
+                assert(context.readPredicate());
+            }
             passed_predicate = context.readPredicate();

             /* Set predicate in tracing */
             if (inst->traceData)
                 inst->traceData->setPredicate(passed_predicate);

- /* If the instruction didn't pass its predicate (and so willnot

-             *  progress from here)  Try to branch to correct and branch
-             *  mis-prediction. */
-            if (!passed_predicate) {
-                /* Leave it up to commit to handle the fault */
+            if (!inst->inLSQ) {
+                /* The instruction does not make it into the LSQ yet (due
+                 * to predication). We still need a matching request for
+                 * the commit */
                 lsq.pushFailedRequest(inst);
+                inst->inLSQ = true;
             }
         }

@@ -913,14 +920,13 @@
             predicate_passed, fault);

         if (completed_mem_inst && fault != NoFault) {
+            assert(!inst->inLSQ);
             if (early_memory_issue) {

DPRINTF(MinorExecute, "Fault in early executinginst: %s\n",

                     fault->name());
                 /* Don't execute the fault, just stall the instruction
                  *  until it gets to the head of inFlightInsts */
                 inst->canEarlyIssue = false;
-                /* Not completed as we'll come here again to pick up
-                *  the fault when we get to the end of the FU */
                 completed_inst = false;
             } else {
                 DPRINTF(MinorExecute, "Fault in execute: %s\n",
@@ -930,10 +936,11 @@
                 tryToBranch(inst, fault, branch);
                 completed_inst = true;
             }
+            completed_mem_issue = false;
         } else {
             completed_inst = completed_mem_inst;
+            completed_mem_issue = completed_inst;
         }
-        completed_mem_issue = completed_inst;
     } else if (inst->isInst() && inst->staticInst->isMemBarrier() &&
         !lsq.canPushIntoStoreBuffer())
     {
@@ -1309,8 +1316,8 @@

         /* Mark the mem inst as being in the LSQ */
         if (issued_mem_ref) {
+            assert(inst->inLSQ);
             inst->fuIndex = 0;
-            inst->inLSQ = true;
         }

         /* Pop issued (to LSQ) and discarded mem refs from the inFUMemInsts
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index 7c7c74d..255bd29 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014,2017 ARM Limited
+ * Copyright (c) 2013-2014,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -57,7 +57,7 @@

LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, boolisLoad_,

     PacketDataPtr data_, uint64_t *res_,
-    const std::vector<bool>& writeByteEnable_) :
+    const std::vector<bool>& byteEnable_) :
     SenderState(),
     port(port_),
     inst(inst_),
@@ -67,7 +67,7 @@
     request(),
     fault(NoFault),
     res(res_),
-    writeByteEnable(writeByteEnable_),
+    byteEnable(byteEnable_),
     skipped(false),
     issuedToMemory(false),
     state(NotIssued)
@@ -75,6 +75,13 @@
     request = std::make_shared<Request>();
 }

+void
+LSQ::LSQRequest::disableMemAccess()
+{
+    port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
+    DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
+}
+
 LSQ::AddrRangeCoverage
 LSQ::LSQRequest::containsAddrRangeOf(
     Addr req1_addr, unsigned int req1_size,
@@ -243,16 +250,23 @@
     ThreadContext *thread = port.cpu.getContext(
         inst->id.threadId);

-    port.numAccessesInDTLB++;
+    const auto &byteEnable = request->getByteEnable();
+    if (byteEnable.size() == 0 ||
+        isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
+        port.numAccessesInDTLB++;

-    setState(LSQ::LSQRequest::InTranslation);
+        setState(LSQ::LSQRequest::InTranslation);

-    DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
-    /* Submit the translation request.  The response will come through
-     *  finish/markDelayed on the LSQRequest as it bears the Translation
-     *  interface */
-    thread->getDTBPtr()->translateTiming(
-        request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
+        DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
+        /* Submit the translation request.  The response will come through

+ * finish/markDelayed on the LSQRequest as it bears theTranslation

+         *  interface */
+        thread->getDTBPtr()->translateTiming(

+ request, thread, this, (isLoad ? BaseTLB::Read :BaseTLB::Write));

+    } else {
+        disableMemAccess();
+        setState(LSQ::LSQRequest::Complete);
+    }
 }

 void
@@ -310,8 +324,8 @@

 LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
     bool isLoad_, PacketDataPtr data_, uint64_t *res_,
-    const std::vector<bool>& writeByteEnable_) :
-    LSQRequest(port_, inst_, isLoad_, data_, res_, writeByteEnable_),
+    const std::vector<bool>& byteEnable_) :
+    LSQRequest(port_, inst_, isLoad_, data_, res_, byteEnable_),
     translationEvent([this]{ sendNextFragmentToTranslation(); },
                      "translationEvent"),
     numFragments(0),
@@ -391,7 +405,8 @@
     /* Just past the last address in the request */
     Addr end_addr = base_addr + whole_size;

-    auto& writeByteEnable = request->getWriteByteEnable();
+    auto& byteEnable = request->getByteEnable();
+    unsigned int num_disabled_fragments = 0;

     for (unsigned int fragment_index = 0; fragment_index < numFragments;
          fragment_index++)
@@ -413,41 +428,58 @@
         }

         RequestPtr fragment = std::make_shared<Request>();
+        bool disabled_fragment = false;

         fragment->setContext(request->contextId());
-        fragment->setVirt(0 /* asid */,
-            fragment_addr, fragment_size, request->getFlags(),
-            request->masterId(),
-            request->getPC());
-
-        if (!writeByteEnable.empty()) {
+        if (byteEnable.empty()) {
+            fragment->setVirt(0 /* asid */,
+                fragment_addr, fragment_size, request->getFlags(),
+                request->masterId(),
+                request->getPC());
+        } else {
             // Set up byte-enable mask for the current fragment
-            auto it_start = writeByteEnable.begin() +
+            auto it_start = byteEnable.begin() +
                 (fragment_addr - base_addr);
-            auto it_end = writeByteEnable.begin() +
+            auto it_end = byteEnable.begin() +
                 (fragment_addr - base_addr) + fragment_size;

- fragment->setWriteByteEnable(std::vector<bool>(it_start,it_end));

+            if (isAnyActiveElement(it_start, it_end)) {
+                fragment->setVirt(0 /* asid */,
+                    fragment_addr, fragment_size, request->getFlags(),
+                    request->masterId(),
+                    request->getPC());

+ fragment->setByteEnable(std::vector<bool>(it_start,it_end));

+            } else {
+                disabled_fragment = true;
+            }
         }

- DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%xsize: %d"

-            " (whole request addr: 0x%x size: %d) %s\n",
-            fragment_addr, fragment_size, base_addr, whole_size,
-            (is_last_fragment ? "last fragment" : ""));
+        if (!disabled_fragment) {
+            DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
+                " size: %d (whole request addr: 0x%x size: %d) %s\n",
+                fragment_addr, fragment_size, base_addr, whole_size,
+                (is_last_fragment ? "last fragment" : ""));
+
+            fragmentRequests.push_back(fragment);
+        } else {
+            num_disabled_fragments++;
+        }

         fragment_addr += fragment_size;
-
-        fragmentRequests.push_back(fragment);
     }
+    assert(numFragments >= num_disabled_fragments);
+    numFragments -= num_disabled_fragments;
 }

 void
 LSQ::SplitDataRequest::makeFragmentPackets()
 {
+    assert(numTranslatedFragments > 0);
     Addr base_addr = request->getVaddr();

     DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);

-    for (unsigned int fragment_index = 0; fragment_index < numFragments;
+    for (unsigned int fragment_index = 0;
+         fragment_index < numTranslatedFragments;
          fragment_index++)
     {
         RequestPtr fragment = fragmentRequests[fragment_index];
@@ -475,7 +507,7 @@

         PacketPtr fragment_packet =
             makePacketForRequest(fragment, isLoad, this, request_data,
-                                 fragment->getWriteByteEnable());
+                                 fragment->getByteEnable());

         fragmentPackets.push_back(fragment_packet);
         /* Accumulate flags in parent request */
@@ -492,28 +524,32 @@
 void
 LSQ::SplitDataRequest::startAddrTranslation()
 {
-    setState(LSQ::LSQRequest::InTranslation);
-
     makeFragmentRequests();

-    numInTranslationFragments = 0;
-    numTranslatedFragments = 0;
+    if (numFragments > 0) {
+        setState(LSQ::LSQRequest::InTranslation);
+        numInTranslationFragments = 0;
+        numTranslatedFragments = 0;

-    /* @todo, just do these in sequence for now with
-     * a loop of:
-     * do {
-     *  sendNextFragmentToTranslation ; translateTiming ; finish
-     * } while (numTranslatedFragments != numFragments);
-     */
+        /* @todo, just do these in sequence for now with
+         * a loop of:
+         * do {
+         *  sendNextFragmentToTranslation ; translateTiming ; finish
+         * } while (numTranslatedFragments != numFragments);
+         */

-    /* Do first translation */
-    sendNextFragmentToTranslation();
+        /* Do first translation */
+        sendNextFragmentToTranslation();
+    } else {
+        disableMemAccess();
+        setState(LSQ::LSQRequest::Complete);
+    }
 }

 PacketPtr
 LSQ::SplitDataRequest::getHeadPacket()
 {
-    assert(numIssuedFragments < numFragments);
+    assert(numIssuedFragments < numTranslatedFragments);

     return fragmentPackets[numIssuedFragments];
 }
@@ -521,7 +557,7 @@
 void
 LSQ::SplitDataRequest::stepToNextPacket()
 {
-    assert(numIssuedFragments < numFragments);
+    assert(numIssuedFragments < numTranslatedFragments);

     numIssuedFragments++;
 }
@@ -529,14 +565,13 @@
 void
 LSQ::SplitDataRequest::retireResponse(PacketPtr response)
 {
-    assert(numRetiredFragments < numFragments);
+    assert(numRetiredFragments < numTranslatedFragments);

     DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
-        " offset: 0x%x (retired fragment num: %d) %s\n",
+        " offset: 0x%x (retired fragment num: %d)\n",
         response->req->getVaddr(), response->req->getSize(),
         request->getVaddr() - response->req->getVaddr(),
-        numRetiredFragments,
-        (fault == NoFault ? "" : fault->name()));
+        numRetiredFragments);

     numRetiredFragments++;

@@ -575,7 +610,7 @@
             packet->makeResponse();
     }

-    if (numRetiredFragments == numFragments)
+    if (numRetiredFragments == numTranslatedFragments)
         setState(Complete);

     if (!skipped && isComplete()) {
@@ -1477,7 +1512,7 @@
 void
 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                  unsigned int size, Addr addr, Request::Flags flags,
-                 uint64_t *res, const std::vector<bool>& writeByteEnable)
+                 uint64_t *res, const std::vector<bool>& byteEnable)
 {
     bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
     LSQRequestPtr request;
@@ -1505,10 +1540,10 @@

     if (needs_burst) {
         request = new SplitDataRequest(
-            *this, inst, isLoad, request_data, res, writeByteEnable);
+            *this, inst, isLoad, request_data, res, byteEnable);
     } else {
         request = new SingleDataRequest(
-            *this, inst, isLoad, request_data, res, writeByteEnable);
+            *this, inst, isLoad, request_data, res, byteEnable);
     }

     if (inst->traceData)
@@ -1520,8 +1555,8 @@
         addr, size, flags, cpu.dataMasterId(),
         /* I've no idea why we need the PC, but give it */
         inst->pc.instAddr());
-    if (!writeByteEnable.empty()) {
-        request->request->setWriteByteEnable(writeByteEnable);
+    if (!byteEnable.empty()) {
+        request->request->setByteEnable(byteEnable);
     }

     requests.push(request);
@@ -1561,9 +1596,9 @@
 PacketPtr
 makePacketForRequest(const RequestPtr &request, bool isLoad,
     Packet::SenderState *sender_state, PacketDataPtr data,
-    const std::vector<bool>& writeByteEnable)
+    const std::vector<bool>& byteEnable)
 {
-    assert(!isLoad || writeByteEnable.empty());
+    assert(!isLoad || byteEnable.empty());
     PacketPtr ret = isLoad ? Packet::createRead(request)
                            : Packet::createWrite(request);

@@ -1607,7 +1642,7 @@
     }

     packet = makePacketForRequest(request, isLoad, this, data,
-                                  writeByteEnable);
+                                  byteEnable);
     /* Null the ret data so we know not to deallocate it when the
      * ret is destroyed.  The data now belongs to the ret and
      * the ret is responsible for its destruction */
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 66a2dd1..15c0e8e 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -152,7 +152,7 @@
         uint64_t *res;

         /** Byte-enable mask for writes */
-        std::vector<bool> writeByteEnable;
+        std::vector<bool> byteEnable;

         /** Was skipped.  Set to indicate any reason (faulted, bad
          *  stream sequence number, in a fault shadow) that this
@@ -191,10 +191,12 @@
         /** BaseTLB::Translation interface */
         void markDelayed() { }

+        void disableMemAccess();
+
       public:
         LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
             PacketDataPtr data_ = NULL, uint64_t *res_ = NULL,

- const std::vector<bool>& writeByteEnable_ =std::vector<bool>());

+            const std::vector<bool>& byteEnable_ = std::vector<bool>());

         virtual ~LSQRequest();

@@ -371,8 +373,8 @@
       public:
         SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,

bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ =NULL,- const std::vector<bool>& writeByteEnable_ =std::vector<bool>()) :- LSQRequest(port_, inst_, isLoad_, data_, res_,writeByteEnable_),

+            const std::vector<bool>& byteEnable_ = std::vector<bool>()) :
+            LSQRequest(port_, inst_, isLoad_, data_, res_, byteEnable_),
             packetInFlight(false),
             packetSent(false)
         { }
@@ -418,7 +420,7 @@
         SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
             bool isLoad_, PacketDataPtr data_ = NULL,
             uint64_t *res_ = NULL,

- const std::vector<bool>& writeByteEnable_ =std::vector<bool>());

+            const std::vector<bool>& byteEnable_ = std::vector<bool>());

         ~SplitDataRequest();

@@ -447,7 +449,8 @@
         { return numIssuedFragments != numRetiredFragments; }

         /** Have we stepped past the end of fragmentPackets? */

- bool sentAllPackets() { return numIssuedFragments == numFragments;}

+        bool sentAllPackets()
+        { return numIssuedFragments == numTranslatedFragments; }

         /** For loads, paste the response data into the main
          *  response packet */
@@ -705,10 +708,9 @@
     /** Single interface for readMem/writeMem to issue requests into
      *  the LSQ */
     void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
-                     unsigned int size, Addr addr, Request::Flags flags,
-                     uint64_t *res,
-                     const std::vector<bool>& writeByteEnable =
-                         std::vector<bool>());
+            unsigned int size, Addr addr, Request::Flags flags,
+            uint64_t *res,
+            const std::vector<bool>& byteEnable = std::vector<bool>());

     /** Push a predicate failed-representing request into the queues just
      *  to maintain commit order */
@@ -730,7 +732,7 @@
  *  pushed into the packet as senderState */
 PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad,
     Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL,
-    const std::vector<bool>& writeByteEnable = std::vector<bool>());
+    const std::vector<bool>& byteEnable = std::vector<bool>());
 }

 #endif /* __CPU_MINOR_NEW_LSQ_HH__ */
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index fd2fce6..f3fdfc1 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -244,7 +244,7 @@
         const Addr _addr;
         const uint32_t _size;
         const Request::Flags _flags;
-        std::vector<bool> _writeByteEnable;
+        std::vector<bool> _byteEnable;
         uint32_t _numOutstandingPackets;
       protected:
         LSQUnit* lsqUnit() { return &_port; }
@@ -262,15 +262,15 @@
                    const Addr& addr, const uint32_t& size,
                    const Request::Flags& flags_,
                    PacketDataPtr data = nullptr, uint64_t* res = nullptr,
-                   const std::vector<bool>& writeByteEnable =
-                       std::vector<bool>())
+                   const std::vector<bool>& byteEnable =
+                   std::vector<bool>())
             : _state(State::NotIssued), _senderState(nullptr),
             numTranslatedFragments(0),
             numInTranslationFragments(0),
             _port(*port), _inst(inst), _data(data),
             _res(res), _addr(addr), _size(size),
             _flags(flags_),
-            _writeByteEnable(writeByteEnable),
+            _byteEnable(byteEnable),
             _numOutstandingPackets(0)
         {
             flags[(int)Flag::IsLoad] = isLoad;
@@ -375,10 +375,10 @@
         void
         setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_,
                 MasterID mid, Addr pc,

- const std::vector<bool>& writeByteEnable =std::vector<bool>())

+                const std::vector<bool>& byteEnable = std::vector<bool>())
         {
             request()->setVirt(asid, vaddr, size, flags_, mid, pc);
-            request()->setWriteByteEnable(writeByteEnable);
+            request()->setByteEnable(byteEnable);
         }

         void taskId(const uint32_t& v)
@@ -578,11 +578,17 @@
          * declaration of the names in the parent class. */
         using Flag = typename LSQRequest::Flag;
         using State = typename LSQRequest::State;
+        using LSQRequest::_addr;
         using LSQRequest::_fault;
+        using LSQRequest::_flags;
+        using LSQRequest::_size;
+        using LSQRequest::_byteEnable;
+        using LSQRequest::_requests;
         using LSQRequest::_inst;
         using LSQRequest::_packets;
         using LSQRequest::_port;
         using LSQRequest::_res;
+        using LSQRequest::_taskId;
         using LSQRequest::_senderState;
         using LSQRequest::_state;
         using LSQRequest::flags;
@@ -601,17 +607,11 @@
                           const Request::Flags& flags_,
                           PacketDataPtr data = nullptr,
                           uint64_t* res = nullptr,
-                          const std::vector<bool>& writeByteEnable =
-                              std::vector<bool>()) :
+                          const std::vector<bool>& byteEnable =
+                          std::vector<bool>()) :
             LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
-                       writeByteEnable)
-        {
-            LSQRequest::_requests.push_back(

- std::make_shared<Request>(inst->getASID(), addr, size,flags_,

-                    inst->masterId(), inst->instAddr(), inst->contextId(),
-                    writeByteEnable));
-            LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
-        }
+                       byteEnable) {}
+
         inline virtual ~SingleDataRequest() {}
         virtual void initiateTranslation();
         virtual void finish(const Fault &fault, const RequestPtr &req,
@@ -640,6 +640,7 @@
         using LSQRequest::_port;
         using LSQRequest::_requests;
         using LSQRequest::_res;
+        using LSQRequest::_byteEnable;
         using LSQRequest::_senderState;
         using LSQRequest::_size;
         using LSQRequest::_state;
@@ -660,17 +661,15 @@
         RequestPtr mainReq;
         PacketPtr _mainPacket;

-
       public:

SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, boolisLoad,

                          const Addr& addr, const uint32_t& size,
                          const Request::Flags & flags_,
-                         PacketDataPtr data = nullptr,
-                         uint64_t* res = nullptr,
-                         const std::vector<bool>& writeByteEnable =
-                             std::vector<bool>()) :

+ PacketDataPtr data = nullptr, uint64_t* res =nullptr,

+                         const std::vector<bool>& byteEnable =
+                         std::vector<bool>()) :
             LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
-                       writeByteEnable),
+                       byteEnable),
             numFragments(0),
             numReceivedPackets(0),
             mainReq(nullptr),
@@ -929,7 +928,7 @@

     Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,

- uint64_t *res, const std::vector<bool>&writeByteEnable);

+                      uint64_t *res, const std::vector<bool>& byteEnable);

     /** The CPU pointer. */
     O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index dd70f0f..11d2dc4 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -685,8 +685,7 @@
 Fault
 LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                        unsigned int size, Addr addr, Request::Flags flags,
-                       uint64_t *res,
-                       const std::vector<bool>& writeByteEnable)
+                       uint64_t *res, const std::vector<bool>& byteEnable)
 {
     ThreadID tid = cpu->contextToThread(inst->contextId());
     auto cacheLineSize = cpu->cacheLineSize();
@@ -699,10 +698,10 @@
     } else {
         if (needs_burst) {
             req = new SplitDataRequest(&thread.at(tid), inst, isLoad, addr,
-                    size, flags, data, res, writeByteEnable);
+                    size, flags, data, res, byteEnable);
         } else {

req = new SingleDataRequest(&thread.at(tid), inst, isLoad,addr,

-                    size, flags, data, res, writeByteEnable);
+                    size, flags, data, res, byteEnable);
         }
         assert(req);
         inst->setRequest();
@@ -726,6 +725,7 @@
             else
                 inst->getFault() = cpu->write(req, data, inst->sqIdx);
         } else if (isLoad) {
+            inst->setMemAccPredicate(false);
             // Commit will have to clean up whatever happened.  Set this
             // instruction as executed.
             inst->setExecuted();
@@ -818,14 +818,26 @@
 void
 LSQ<Impl>::SingleDataRequest::initiateTranslation()
 {
-    _inst->translationStarted(true);
-    setState(State::Translation);
-    flags[(int)Flag::TranslationStarted] = true;
+    assert(_requests.size() == 0);

-    _inst->savedReq = this;
-    sendFragmentToTranslation(0);
+    if (_byteEnable.empty() ||
+        isAnyActiveElement(_byteEnable.begin(), _byteEnable.end())) {

+ _requests.push_back(std::make_shared<Request>(_inst->getASID(),_addr,

+                _size, _flags, _inst->masterId(), _inst->instAddr(),
+                _inst->contextId(), _byteEnable));
+    }

-    if (isTranslationComplete()) {
+    if (_requests.size() > 0) {
+        _requests.back()->setReqInstSeqNum(_inst->seqNum);
+        _requests.back()->taskId(_taskId);
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags[(int)Flag::TranslationStarted] = true;
+
+        _inst->savedReq = this;
+        sendFragmentToTranslation(0);
+    } else {
+        _inst->setMemAccPredicate(false);
     }
 }

@@ -847,11 +859,7 @@
 void
 LSQ<Impl>::SplitDataRequest::initiateTranslation()
 {
-    _inst->translationStarted(true);
-    setState(State::Translation);
-    flags[(int)Flag::TranslationStarted] = true;
-
-    unsigned int cacheLineSize = _port.cacheLineSize();
+    auto cacheLineSize = _port.cacheLineSize();
     Addr base_addr = _addr;
     Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
     Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
@@ -860,9 +868,7 @@
     mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
                 _size, _flags, _inst->masterId(),
                 _inst->instAddr(), _inst->contextId(),
-                this->_writeByteEnable);
-
-    auto& writeByteEnable = mainReq->getWriteByteEnable();
+                _byteEnable);

     // Paddr is not used in mainReq. However, we will accumulate the flags

// from the sub requests into mainReq by calling setFlags() infinish().

@@ -871,35 +877,38 @@
     mainReq->setPaddr(0);

     /* Get the pre-fix, possibly unaligned. */
-    if (writeByteEnable.empty()) {
+    if (_byteEnable.empty()) {
         _requests.push_back(std::make_shared<Request>(_inst->getASID(),
                     base_addr, next_addr - base_addr, _flags,

_inst->masterId(), _inst->instAddr(),_inst->contextId()));

     } else {
-        auto it_start = writeByteEnable.begin();
-        auto it_end = writeByteEnable.begin() + (next_addr - base_addr);
-        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+        auto it_start = _byteEnable.begin();
+        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+        if (isAnyActiveElement(it_start, it_end)) {
+            _requests.push_back(std::make_shared<Request>(_inst->getASID(),
                     base_addr, next_addr - base_addr, _flags,

_inst->masterId(), _inst->instAddr(),_inst->contextId(),

                     std::vector<bool>(it_start, it_end)));
+        }
     }
     size_so_far = next_addr - base_addr;

     /* We are block aligned now, reading whole blocks. */
     base_addr = next_addr;
     while (base_addr != final_addr) {
-        if (writeByteEnable.empty()) {
+        if (_byteEnable.empty()) {
             _requests.push_back(std::make_shared<Request>(_inst->getASID(),

base_addr, cacheLineSize, _flags,_inst->masterId(),

                         _inst->instAddr(), _inst->contextId()));
         } else {
-            auto it_start = writeByteEnable.begin() + size_so_far;
-            auto it_end = writeByteEnable.begin() + size_so_far +
-                cacheLineSize;
-            _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+            auto it_start = _byteEnable.begin() + size_so_far;

+ auto it_end = _byteEnable.begin() + size_so_far +cacheLineSize;

+            if (isAnyActiveElement(it_start, it_end)) {

+_requests.push_back(std::make_shared<Request>(_inst->getASID(),base_addr, cacheLineSize, _flags,_inst->masterId(),

                         _inst->instAddr(), _inst->contextId(),
                         std::vector<bool>(it_start, it_end)));
+            }
         }
         size_so_far += cacheLineSize;
         base_addr += cacheLineSize;
@@ -907,33 +916,44 @@

     /* Deal with the tail. */
     if (size_so_far < _size) {
-        if (writeByteEnable.empty()) {
+        if (_byteEnable.empty()) {
             _requests.push_back(std::make_shared<Request>(_inst->getASID(),
                         base_addr, _size - size_so_far, _flags,
                         _inst->masterId(), _inst->instAddr(),
                         _inst->contextId()));
         } else {
-            auto it_start = writeByteEnable.begin() + size_so_far;
-            auto it_end = writeByteEnable.end();
-            _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+            auto it_start = _byteEnable.begin() + size_so_far;
+            auto it_end = _byteEnable.end();
+            if (isAnyActiveElement(it_start, it_end)) {

+_requests.push_back(std::make_shared<Request>(_inst->getASID(),

                         base_addr, _size - size_so_far, _flags,
                         _inst->masterId(), _inst->instAddr(),
                         _inst->contextId(),
                         std::vector<bool>(it_start, it_end)));
+            }
         }
     }

-    /* Setup the requests and send them to translation. */
-    for (auto& r: _requests) {
-        r->setReqInstSeqNum(_inst->seqNum);
-        r->taskId(_taskId);
-    }
-    this->_inst->savedReq = this;
-    numInTranslationFragments = 0;
-    numTranslatedFragments = 0;
+    if (_requests.size() > 0) {
+        /* Setup the requests and send them to translation. */
+        for (auto& r: _requests) {
+            r->setReqInstSeqNum(_inst->seqNum);
+            r->taskId(_taskId);
+        }

-    for (uint32_t i = 0; i < _requests.size(); i++) {
-        sendFragmentToTranslation(i);
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags[(int)Flag::TranslationStarted] = true;
+        this->_inst->savedReq = this;
+        numInTranslationFragments = 0;
+        numTranslatedFragments = 0;
+        _fault.resize(_requests.size());
+
+        for (uint32_t i = 0; i < _requests.size(); i++) {
+            sendFragmentToTranslation(i);
+        }
+    } else {
+        _inst->setMemAccPredicate(false);
     }
 }

@@ -971,8 +991,6 @@
     while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
         pktIdx++;
     assert(pktIdx < _packets.size());
-    assert(pkt->req == _requests[pktIdx]);
-    assert(pkt == _packets[pktIdx]);
     numReceivedPackets++;
     state->outstanding--;
     if (numReceivedPackets == _packets.size()) {
@@ -1015,16 +1033,19 @@
 LSQ<Impl>::SplitDataRequest::buildPackets()
 {
     /* Extra data?? */
-    ptrdiff_t offset = 0;
+    Addr base_address = _addr;
+
     if (_packets.size() == 0) {
         /* New stuff */
         if (isLoad()) {
             _mainPacket = Packet::createRead(mainReq);
             _mainPacket->dataStatic(_inst->memData);
         }
-        for (auto& r: _requests) {

+ for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++){

+            RequestPtr r = _requests[i];
             PacketPtr pkt = isLoad() ? Packet::createRead(r)
-                                    : Packet::createWrite(r);
+                                     : Packet::createWrite(r);
+            ptrdiff_t offset = r->getVaddr() - base_address;
             if (isLoad()) {
                 pkt->dataStatic(_inst->memData + offset);
             } else {
@@ -1034,12 +1055,11 @@
                         r->getSize());
                 pkt->dataDynamic(req_data);
             }
-            offset += r->getSize();
             pkt->senderState = _senderState;
             _packets.push_back(pkt);
         }
     }
-    assert(_packets.size() == _requests.size());
+    assert(_packets.size() > 0);
 }

 template<class Impl>
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 66b2f75b..8b2c9d0 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -334,7 +334,8 @@

 Fault
 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
-                         Request::Flags flags)
+                         Request::Flags flags,
+                         const std::vector<bool>& byteEnable)
 {
     SimpleExecContext& t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -352,22 +353,42 @@
     Addr frag_addr = addr;
     int frag_size = 0;
     int size_left = size;
+    bool predicate;
+    Fault fault = NoFault;

     while (1) {
+        predicate = true;
         frag_size = std::min(
             cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
             (Addr) size_left);
         size_left -= frag_size;

-        req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
-                     thread->pcState().instAddr());
+        if (!byteEnable.empty()) {
+            // Set up byte-enable mask for the current fragment
+            auto it_start = byteEnable.begin() + (size - (frag_size +
+                                                          size_left));
+            auto it_end = byteEnable.begin() + (size - size_left);
+            if (isAnyActiveElement(it_start, it_end)) {

+ req->setVirt(0, frag_addr, frag_size, flags,dataMasterId(),

+                             thread->pcState().instAddr());
+                req->setByteEnable(std::vector<bool>(it_start, it_end));
+            } else {
+                predicate = false;
+            }
+        } else {
+            req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                         thread->pcState().instAddr());
+        }

         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
-                                                   BaseTLB::Read);
+        if (predicate) {
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Read);
+        }

         // Now do the access.

- if (fault == NoFault&& !req->getFlags().isSet(Request::NO_ACCESS)) {

+        if (predicate && fault == NoFault &&
+            !req->getFlags().isSet(Request::NO_ACCESS)) {
             Packet pkt(req, Packet::makeReadCmd(req));
             pkt.dataStatic(data);

@@ -415,7 +436,8 @@

 Fault
 AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size,
-                                 Request::Flags flags)
+                                 Request::Flags flags,
+                                 const std::vector<bool>& byteEnable)
 {
     panic("initiateMemRead() is for timing accesses, and should "
           "never be called on AtomicSimpleCPU.\n");
@@ -451,8 +473,11 @@
     int frag_size = 0;
     int size_left = size;
     int curr_frag_id = 0;
+    bool predicate;
+    Fault fault = NoFault;

     while (1) {
+        predicate = true;
         frag_size = std::min(
             cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
             (Addr) size_left);
@@ -467,15 +492,25 @@
                                                           size_left));
             auto it_end = byteEnable.begin() + (size - size_left);

-            req->setWriteByteEnable(std::vector<bool>(it_start, it_end));
+            if (isAnyActiveElement(it_start, it_end)) {

+ req->setVirt(0, frag_addr, frag_size, flags,dataMasterId(),

+                             thread->pcState().instAddr());
+                req->setByteEnable(std::vector<bool>(it_start, it_end));
+            } else {
+                predicate = false;
+            }
+        } else {
+            req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                         thread->pcState().instAddr());
         }

         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
-                                                   BaseTLB::Write);
+        if (predicate)
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Write);

         // Now do the access.
-        if (fault == NoFault) {
+        if (predicate && fault == NoFault) {
             bool do_access = true;  // flag to suppress cache access

             if (req->isLLSC()) {
@@ -523,6 +558,7 @@
         if (fault != NoFault || size_left == 0)
         {
             if (req->isLockedRMW() && fault == NoFault) {
+                assert(byteEnable.empty());
                 assert(locked && curr_frag_id == 0);
                 locked = false;
             }
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index e76cd2b..10388ca 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -195,10 +195,14 @@
     void suspendContext(ThreadID thread_num) override;

     Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override;

     Fault initiateMemRead(Addr addr, unsigned size,
-                          Request::Flags flags) override;
+                          Request::Flags flags,
+                          const std::vector<bool>& byteEnable =
+                          std::vector<bool>()) override;

     Fault writeMem(uint8_t *data, unsigned size,
                    Addr addr, Request::Flags flags, uint64_t *res,
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index d717cac..3950589 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012,2015 ARM Limited
+ * Copyright (c) 2011-2012,2015,2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -143,10 +143,14 @@
     void startup() override;

     virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
-                          Request::Flags flags) = 0;
+                          Request::Flags flags,
+                          const std::vector<bool>& byteEnable =
+                          std::vector<bool>()) = 0;

     virtual Fault initiateMemRead(Addr addr, unsigned size,
-                                  Request::Flags flags) = 0;
+                                  Request::Flags flags,
+                                  const std::vector<bool>& byteEnable =
+                                  std::vector<bool>()) = 0;

     virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
                            Request::Flags flags, uint64_t* res,
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 0fc7cd6..5a4ab5b 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -445,15 +445,19 @@


     Fault readMem(Addr addr, uint8_t *data, unsigned int size,
-                  Request::Flags flags) override
+                  Request::Flags flags,

+ const std::vector<bool>& byteEnable =std::vector<bool>())

+        override
     {
-        return cpu->readMem(addr, data, size, flags);
+        return cpu->readMem(addr, data, size, flags, byteEnable);
     }

     Fault initiateMemRead(Addr addr, unsigned int size,
-                          Request::Flags flags) override
+                          Request::Flags flags,
+                          const std::vector<bool>& byteEnable =
+                          std::vector<bool>()) override
     {
-        return cpu->initiateMemRead(addr, size, flags);
+        return cpu->initiateMemRead(addr, size, flags, byteEnable);
     }

     Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index c03f601..cf88360 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -1,6 +1,6 @@
 /*
  * Copyright 2014 Google, Inc.
- * Copyright (c) 2010-2013,2015,2017 ARM Limited
+ * Copyright (c) 2010-2013,2015,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -416,7 +416,8 @@

 Fault
 TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
-                         unsigned size, Request::Flags flags)
+                         unsigned size, Request::Flags flags,
+                         const std::vector<bool>& byteEnable)
 {
     panic("readMem() is for atomic accesses, and should "
           "never be called on TimingSimpleCPU.\n");
@@ -424,7 +425,8 @@

 Fault
 TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
-                                 Request::Flags flags)
+                                 Request::Flags flags,
+                                 const std::vector<bool>& byteEnable)
 {
     SimpleExecContext &t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -440,7 +442,7 @@

     RequestPtr req = std::make_shared<Request>(
         asid, addr, size, flags, dataMasterId(), pc,
-        thread->contextId());
+        thread->contextId(), byteEnable);

     req->taskId(taskId());

diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 05111aa..0d1b53f 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013,2015 ARM Limited
+ * Copyright (c) 2012-2013,2015,2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -283,10 +283,14 @@
     void suspendContext(ThreadID thread_num) override;

     Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;

     Fault initiateMemRead(Addr addr, unsigned size,
-                          Request::Flags flags) override;
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable =std::vector<bool>())
+        override;

     Fault writeMem(uint8_t *data, unsigned size,
                    Addr addr, Request::Flags flags, uint64_t *res,
diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh
index f2cc089..32bfdab 100644
--- a/src/cpu/utils.hh
+++ b/src/cpu/utils.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -65,4 +65,16 @@
     return (addrBlockOffset(addr, block_size) + size) > block_size;
 }

+/**
+ * Test if there is any active element in an enablement range
+ */
+inline bool
+isAnyActiveElement(const std::vector<bool>::const_iterator& it_start,
+                   const std::vector<bool>::const_iterator& it_end)
+{
+    auto it_tmp = it_start;
+    for (;it_tmp != it_end && !(*it_tmp); ++it_tmp);
+    return (it_tmp != it_end);
+}
+
 #endif // __CPU_UTILS_HH__
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 1508460..aaa2aa7 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012,2017 ARM Limited
+ * Copyright (c) 2010-2012,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -401,7 +401,7 @@
             if (pmemAddr) {
                 if (pkt->isMaskedWrite()) {
                     for (int i = 0; i < pkt->getSize(); i++) {
-                        if (pkt->req->getWriteByteEnable()[i]) {
+                        if (pkt->req->getByteEnable()[i]) {
                             hostAddr[i] = pkt->getConstPtr<uint8_t>()[i];
                         }
                     }
@@ -443,7 +443,7 @@
         if (pmemAddr) {
             if (pkt->isMaskedWrite()) {
                 for (int i = 0; i < pkt->getSize(); i++) {
-                    if (pkt->req->getWriteByteEnable()[i]) {
+                    if (pkt->req->getByteEnable()[i]) {
                         hostAddr[i] = pkt->getConstPtr<uint8_t>()[i];
                     }
                 }
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index ef852c7..be39751 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -1029,7 +1029,7 @@
     getPtr()
     {
         assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA));
-        assert(!isWrite() || req->getWriteByteEnable().empty());
+        assert(!isMaskedWrite());
         return (T*)data;
     }

@@ -1118,10 +1118,11 @@
         // same pointer from source to destination and back
         assert(p != getPtr<uint8_t>() || flags.isSet(STATIC_DATA));

-        if (p != getPtr<uint8_t>())
+        if (p != getPtr<uint8_t>()) {
             // for packet with allocated dynamic data, we copy data from
             // one to the other, e.g. a forwarded response to a response
             std::memcpy(getPtr<uint8_t>(), p, getSize());
+        }
     }

     /**
@@ -1141,12 +1142,13 @@
     void
     writeData(uint8_t *p) const
     {
-        if (req->getWriteByteEnable().empty()) {
+        if (!isMaskedWrite()) {
             std::memcpy(p, getConstPtr<uint8_t>(), getSize());
         } else {
+            assert(req->getByteEnable().size() == getSize());
             // Write only the enabled bytes
             for (int i = 0; i < getSize(); i++) {
-                if (req->getWriteByteEnable()[i]) {
+                if (req->getByteEnable()[i]) {
                     p[i] = *(getConstPtr<uint8_t>() + i);
                 }
                 // Disabled bytes stay untouched
@@ -1216,7 +1218,7 @@
     bool
     trySatisfyFunctional(PacketPtr other)
     {

- if (other->isWrite() && !other->req->getWriteByteEnable().empty()){

+        if (other->isMaskedWrite()) {
             if (getAddr() <= (other->getAddr() + other->getSize() - 1) &&
                 other->getAddr() <= (getAddr() + getSize() - 1)) {
                 warn("Trying to check against a masked write, skipping."
@@ -1256,7 +1258,7 @@
     bool
     isMaskedWrite() const
     {
-        return !req->getWriteByteEnable().empty();
+        return (cmd == MemCmd::WriteReq && !req->getByteEnable().empty());
     }

     /**
diff --git a/src/mem/request.hh b/src/mem/request.hh
index d92c584..10985ad 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -321,7 +321,7 @@
     unsigned _size;

     /** Byte-enable mask for writes. */
-    std::vector<bool> _writeByteEnable;
+    std::vector<bool> _byteEnable;

     /** The requestor ID which is unique in the system for all ports
      * that are capable of issuing a transaction
@@ -458,7 +458,7 @@

     Request(uint64_t asid, Addr vaddr, unsigned size, Flags flags,
             MasterID mid, Addr pc, ContextID cid,
-            const std::vector<bool>& writeByteEnable = std::vector<bool>())
+            const std::vector<bool>& byteEnable = std::vector<bool>())
         : _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
           _taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
           _extraData(0), _contextId(0), _pc(0),
@@ -466,7 +466,7 @@
           accessDelta(0), depth(0)
     {
         setVirt(asid, vaddr, size, flags, mid, pc);
-        setWriteByteEnable(writeByteEnable);
+        setByteEnable(byteEnable);
         setContext(cid);
     }

@@ -585,13 +585,13 @@
         req1->_size = split_addr - _vaddr;
         req2->_vaddr = split_addr;
         req2->_size = _size - req1->_size;
-        if (!_writeByteEnable.empty()) {
-            req1->_writeByteEnable = std::vector<bool>(
-                _writeByteEnable.begin(),
-                _writeByteEnable.begin() + req1->_size);
-            req2->_writeByteEnable = std::vector<bool>(
-                _writeByteEnable.begin() + req1->_size,
-                _writeByteEnable.end());
+        if (!_byteEnable.empty()) {
+            req1->_byteEnable = std::vector<bool>(
+                _byteEnable.begin(),
+                _byteEnable.begin() + req1->_size);
+            req2->_byteEnable = std::vector<bool>(
+                _byteEnable.begin() + req1->_size,
+                _byteEnable.end());
         }
     }

@@ -645,16 +645,16 @@
     }

     const std::vector<bool>&
-    getWriteByteEnable() const
+    getByteEnable() const
     {
-        return _writeByteEnable;
+        return _byteEnable;
     }

     void
-    setWriteByteEnable(const std::vector<bool>& wbe)
+    setByteEnable(const std::vector<bool>& be)
     {
-        assert(wbe.empty() || wbe.size() == _size);
-        _writeByteEnable = wbe;
+        assert(be.empty() || be.size() == _size);
+        _byteEnable = be;
     }

     /** Accessor for time. */

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/16868

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I9c91fdbe192af56e95a98a20490fa386a2966f24
Gerrit-Change-Number: 16868
Gerrit-PatchSet: 1
Gerrit-Owner: Gabor Dozsa <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

[gem5-dev] Change in gem5/gem5[master]: mem, cpu: Add support for masked reads and clean up masked writes

Reply via email to