Gabe Black has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/40175 )

Change subject: arch,cpu: Make the decoder width a property of the decoder.
......................................................................

arch,cpu: Make the decoder width a property of the decoder.

In this context, the decoder width is the number of bytes that are fed
into the decoder at once. This is frequently the same as the size of an
instruction, but in instructions with occasionally variable instruction
sizes (ARM, RISCV), or extremely variable instruction sizes (x86) there
may be no relation.

Rather than determining the amount of data to feed to the decoder based
on a MachInst type defined by each ISA, this new interface adds some new
properties to the base InstDecoder class each arch specific decoder
inherits from. These are the size of the incoming buffer, a pointer to
wherever that data should end up, and a mask for masking a PC value so
it aligns with the instruction size.

These values are filled in by a templated InstDecoder constructor which
is templated based on what would have historically been the MachInst
type.

Because the "moreBytes" method would historically accept a parameter of
type MachInst, this parameter has also been eliminated. Now, the
decoder's parent object should use the pointer and size values to fill
in the buffer moreBytes reads. Then when moreBytes is called, it just
uses the buffer without having to show what its type is externally.

Change-Id: I0642cdb6a61e152441ca4ce47d748639175cda90
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40175
Reviewed-by: Gabe Black <gabe.bl...@gmail.com>
Maintainer: Gabe Black <gabe.bl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/arm/decoder.cc
M src/arch/arm/decoder.hh
M src/arch/generic/decoder.hh
M src/arch/mips/decoder.hh
M src/arch/power/decoder.hh
M src/arch/riscv/decoder.cc
M src/arch/riscv/decoder.hh
M src/arch/sparc/decoder.hh
M src/arch/x86/decoder.hh
M src/cpu/base.hh
M src/cpu/checker/cpu_impl.hh
M src/cpu/minor/fetch1.cc
M src/cpu/minor/fetch2.cc
M src/cpu/o3/fetch.cc
M src/cpu/simple/atomic.cc
M src/cpu/simple/base.cc
M src/cpu/simple/base.hh
M src/cpu/simple/noncaching.cc
M src/cpu/simple/timing.cc
19 files changed, 130 insertions(+), 116 deletions(-)

Approvals:
  Gabe Black: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc
index d18cc83..4c1ec40 100644
--- a/src/arch/arm/decoder.cc
+++ b/src/arch/arm/decoder.cc
@@ -52,7 +52,7 @@
 GenericISA::BasicDecodeCache<Decoder, ExtMachInst> Decoder::defaultCache;

 Decoder::Decoder(ISA* isa)
-    : data(0), fpscrLen(0), fpscrStride(0),
+    : InstDecoder(&data), data(0), fpscrLen(0), fpscrStride(0),
       decoderFlavor(isa->decoderFlavor())
 {
     reset();
@@ -143,15 +143,15 @@
 Decoder::consumeBytes(int numBytes)
 {
     offset += numBytes;
-    assert(offset <= sizeof(MachInst) || emi.decoderFault);
-    if (offset == sizeof(MachInst))
+    assert(offset <= sizeof(data) || emi.decoderFault);
+    if (offset == sizeof(data))
         outOfBytes = true;
 }

 void
-Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
+Decoder::moreBytes(const PCState &pc, Addr fetchPC)
 {
-    data = letoh(inst);
+    data = letoh(data);
     offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC;
     emi.thumb = pc.thumb();
     emi.aarch64 = pc.aarch64();
diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh
index a536fe2..7f63f32 100644
--- a/src/arch/arm/decoder.hh
+++ b/src/arch/arm/decoder.hh
@@ -61,7 +61,7 @@
   protected:
     //The extended machine instruction being generated
     ExtMachInst emi;
-    MachInst data;
+    uint32_t data;
     bool bigThumb;
     bool instDone;
     bool outOfBytes;
@@ -179,7 +179,7 @@
      * @param fetchPC The address this chunk was fetched from.
      * @param inst Raw instruction data.
      */
-    void moreBytes(const PCState &pc, Addr fetchPC, MachInst inst);
+    void moreBytes(const PCState &pc, Addr fetchPC);

     /**
      * Decode an instruction or fetch it from the code cache.
diff --git a/src/arch/generic/decoder.hh b/src/arch/generic/decoder.hh
index 3df4dcc..cb6e4cd 100644
--- a/src/arch/generic/decoder.hh
+++ b/src/arch/generic/decoder.hh
@@ -28,14 +28,31 @@
 #ifndef __ARCH_GENERIC_DECODER_HH__
 #define __ARCH_GENERIC_DECODER_HH__

+#include "base/bitfield.hh"
+#include "base/intmath.hh"
 #include "base/types.hh"
 #include "cpu/static_inst_fwd.hh"

 class InstDecoder
 {
+  protected:
+    void *_moreBytesPtr;
+    size_t _moreBytesSize;
+    Addr _pcMask;
+
   public:
+    template <typename MoreBytesType>
+    InstDecoder(MoreBytesType *mb_buf) :
+        _moreBytesPtr(mb_buf), _moreBytesSize(sizeof(MoreBytesType)),
+        _pcMask(~mask(floorLog2(_moreBytesSize)))
+    {}
+
     virtual StaticInstPtr fetchRomMicroop(
             MicroPC micropc, StaticInstPtr curMacroop);
+
+    void *moreBytesPtr() const { return _moreBytesPtr; }
+    size_t moreBytesSize() const { return _moreBytesSize; }
+    Addr pcMask() const { return _pcMask; }
 };

 #endif // __ARCH_DECODER_GENERIC_HH__
diff --git a/src/arch/mips/decoder.hh b/src/arch/mips/decoder.hh
index 969c152..4b8a84c 100644
--- a/src/arch/mips/decoder.hh
+++ b/src/arch/mips/decoder.hh
@@ -46,10 +46,11 @@
   protected:
     //The extended machine instruction being generated
     ExtMachInst emi;
+    uint32_t machInst;
     bool instDone;

   public:
-    Decoder(ISA* isa = nullptr) : instDone(false)
+    Decoder(ISA* isa = nullptr) : InstDecoder(&machInst), instDone(false)
     {}

     void
@@ -66,9 +67,9 @@
     //Use this to give data to the decoder. This should be used
     //when there is control flow.
     void
-    moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
+    moreBytes(const PCState &pc, Addr fetchPC)
     {
-        emi = letoh(inst);
+        emi = letoh(machInst);
         instDone = true;
     }

diff --git a/src/arch/power/decoder.hh b/src/arch/power/decoder.hh
index bd32614..eb67ed0 100644
--- a/src/arch/power/decoder.hh
+++ b/src/arch/power/decoder.hh
@@ -47,9 +47,7 @@
     bool instDone;

   public:
-    Decoder(ISA* isa = nullptr) : instDone(false)
-    {
-    }
+    Decoder(ISA* isa=nullptr) : InstDecoder(&emi), instDone(false) {}

     void
     process()
@@ -65,20 +63,12 @@
     // Use this to give data to the predecoder. This should be used
     // when there is control flow.
     void
-    moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
+    moreBytes(const PCState &pc, Addr fetchPC)
     {
-        emi = betoh(inst);
+        emi = betoh(emi);
         instDone = true;
     }

-    // Use this to give data to the predecoder. This should be used
-    // when instructions are executed in order.
-    void
-    moreBytes(MachInst machInst)
-    {
-        moreBytes(0, 0, machInst);
-    }
-
     bool
     needMoreBytes()
     {
diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc
index 26b6adb..8f128b0 100644
--- a/src/arch/riscv/decoder.cc
+++ b/src/arch/riscv/decoder.cc
@@ -29,14 +29,12 @@

 #include "arch/riscv/decoder.hh"
 #include "arch/riscv/types.hh"
+#include "base/bitfield.hh"
 #include "debug/Decode.hh"

 namespace RiscvISA
 {

-static const MachInst LowerBitMask = (1 << sizeof(MachInst) * 4) - 1;
-static const MachInst UpperBitMask = LowerBitMask << sizeof(MachInst) * 4;
-
 void Decoder::reset()
 {
     aligned = true;
@@ -47,28 +45,32 @@
 }

 void
-Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
+Decoder::moreBytes(const PCState &pc, Addr fetchPC)
 {
-    inst = letoh(inst);
+    // The MSB of the upper and lower halves of a machine instruction.
+    constexpr size_t max_bit = sizeof(machInst) * 8 - 1;
+    constexpr size_t mid_bit = sizeof(machInst) * 4 - 1;
+
+    auto inst = letoh(machInst);
     DPRINTF(Decode, "Requesting bytes 0x%08x from address %#x\n", inst,
             fetchPC);

-    bool aligned = pc.pc() % sizeof(MachInst) == 0;
+    bool aligned = pc.pc() % sizeof(machInst) == 0;
     if (aligned) {
         emi = inst;
         if (compressed(emi))
-            emi &= LowerBitMask;
+            emi = bits(emi, mid_bit, 0);
         more = !compressed(emi);
         instDone = true;
     } else {
         if (mid) {
-            assert((emi & UpperBitMask) == 0);
-            emi |= (inst & LowerBitMask) << sizeof(MachInst)*4;
+            assert(bits(emi, max_bit, mid_bit + 1) == 0);
+            replaceBits(emi, max_bit, mid_bit + 1, inst);
             mid = false;
             more = false;
             instDone = true;
         } else {
-            emi = (inst & UpperBitMask) >> sizeof(MachInst)*4;
+            emi = bits(inst, max_bit, mid_bit + 1);
             mid = !compressed(emi);
             more = true;
             instDone = compressed(emi);
@@ -99,9 +101,9 @@
     instDone = false;

     if (compressed(emi)) {
-        nextPC.npc(nextPC.instAddr() + sizeof(MachInst) / 2);
+        nextPC.npc(nextPC.instAddr() + sizeof(machInst) / 2);
     } else {
-        nextPC.npc(nextPC.instAddr() + sizeof(MachInst));
+        nextPC.npc(nextPC.instAddr() + sizeof(machInst));
     }

     return decode(emi, nextPC.instAddr());
diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh
index f1a9b2e..9b3873d 100644
--- a/src/arch/riscv/decoder.hh
+++ b/src/arch/riscv/decoder.hh
@@ -53,6 +53,7 @@
   protected:
     //The extended machine instruction being generated
     ExtMachInst emi;
+    uint32_t machInst;
     bool instDone;

     StaticInstPtr decodeInst(ExtMachInst mach_inst);
@@ -63,7 +64,7 @@
     StaticInstPtr decode(ExtMachInst mach_inst, Addr addr);

   public:
-    Decoder(ISA* isa=nullptr) { reset(); }
+    Decoder(ISA* isa=nullptr) : InstDecoder(&machInst) { reset(); }

     void process() {}
     void reset();
@@ -72,7 +73,7 @@

     //Use this to give data to the decoder. This should be used
     //when there is control flow.
-    void moreBytes(const PCState &pc, Addr fetchPC, MachInst inst);
+    void moreBytes(const PCState &pc, Addr fetchPC);

     bool needMoreBytes() { return more; }
     bool instReady() { return instDone; }
diff --git a/src/arch/sparc/decoder.hh b/src/arch/sparc/decoder.hh
index 4530f77..7a3669b 100644
--- a/src/arch/sparc/decoder.hh
+++ b/src/arch/sparc/decoder.hh
@@ -44,11 +44,12 @@
   protected:
     // The extended machine instruction being generated
     ExtMachInst emi;
+    uint32_t machInst;
     bool instDone;
     RegVal asi;

   public:
-    Decoder(ISA* isa = nullptr) : instDone(false), asi(0)
+ Decoder(ISA* isa=nullptr) : InstDecoder(&machInst), instDone(false), asi(0)
     {}

     void process() {}
@@ -62,19 +63,19 @@
     // Use this to give data to the predecoder. This should be used
     // when there is control flow.
     void
-    moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
+    moreBytes(const PCState &pc, Addr fetchPC)
     {
-        emi = betoh(inst);
+        emi = betoh(machInst);
         // The I bit, bit 13, is used to figure out where the ASI
         // should come from. Use that in the ExtMachInst. This is
         // slightly redundant, but it removes the need to put a condition
         // into all the execute functions
         if (emi & (1 << 13)) {
             emi |= (static_cast<ExtMachInst>(
-                        asi << (sizeof(MachInst) * 8)));
+                        asi << (sizeof(machInst) * 8)));
         } else {
             emi |= (static_cast<ExtMachInst>(bits(emi, 12, 5))
-                    << (sizeof(MachInst) * 8));
+                    << (sizeof(machInst) * 8));
         }
         instDone = true;
     }
diff --git a/src/arch/x86/decoder.hh b/src/arch/x86/decoder.hh
index 6f51c1a..5bea939 100644
--- a/src/arch/x86/decoder.hh
+++ b/src/arch/x86/decoder.hh
@@ -71,6 +71,8 @@
     static X86ISAInst::MicrocodeRom microcodeRom;

   protected:
+    using MachInst = uint64_t;
+
     struct InstBytes
     {
         StaticInstPtr si;
@@ -250,7 +252,7 @@
     StaticInstPtr decode(ExtMachInst mach_inst, Addr addr);

   public:
-    Decoder(ISA *isa=nullptr)
+    Decoder(ISA *isa=nullptr) : InstDecoder(&fetchChunk)
     {
         emi.reset();
         emi.mode.mode = mode;
@@ -308,12 +310,12 @@
     // Use this to give data to the decoder. This should be used
     // when there is control flow.
     void
-    moreBytes(const PCState &pc, Addr fetchPC, MachInst data)
+    moreBytes(const PCState &pc, Addr fetchPC)
     {
         DPRINTF(Decoder, "Getting more bytes.\n");
         basePC = fetchPC;
         offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC;
-        fetchChunk = letoh(data);
+        fetchChunk = letoh(fetchChunk);
         outOfBytes = false;
         process();
     }
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index daa0813..c113829 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -267,9 +267,6 @@
      *  or has not assigned a pid yet */
     static const uint32_t invldPid = std::numeric_limits<uint32_t>::max();

-    // Mask to align PCs to MachInst sized boundaries
-    static const Addr PCMask = ~((Addr)sizeof(TheISA::MachInst) - 1);
-
     /// Provide access to the tracer pointer
     Trace::InstTracer * getTracer() { return tracer; }

diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index de123d4..7b906d3 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -180,6 +180,9 @@
     unverifiedInst = inst;
     inst = NULL;

+    auto &decoder = thread->decoder;
+    const Addr pc_mask = decoder.pcMask();
+
     // Try to check all instructions that are completed, ending if we
     // run out of instructions to check or if an instruction is not
     // yet completed.
@@ -223,21 +226,18 @@
         // Try to fetch the instruction
         uint64_t fetchOffset = 0;
         bool fetchDone = false;
-
         while (!fetchDone) {
             Addr fetch_PC = thread->instAddr();
-            fetch_PC = (fetch_PC & PCMask) + fetchOffset;
-
-            TheISA::MachInst machInst;
+            fetch_PC = (fetch_PC & pc_mask) + fetchOffset;

             // If not in the middle of a macro instruction
             if (!curMacroStaticInst) {
                 // set up memory request for instruction fetch
                 auto mem_req = std::make_shared<Request>(
-                    fetch_PC, sizeof(TheISA::MachInst), 0, requestorId,
+                    fetch_PC, decoder.moreBytesSize(), 0, requestorId,
                     fetch_PC, thread->contextId());

-                mem_req->setVirt(fetch_PC, sizeof(TheISA::MachInst),
+                mem_req->setVirt(fetch_PC, decoder.moreBytesSize(),
                                  Request::INST_FETCH, requestorId,
                                  thread->instAddr());

@@ -271,7 +271,7 @@
                 } else {
                     PacketPtr pkt = new Packet(mem_req, MemCmd::ReadReq);

-                    pkt->dataStatic(&machInst);
+                    pkt->dataStatic(decoder.moreBytesPtr());
                     icachePort->sendFunctional(pkt);

                     delete pkt;
@@ -283,7 +283,7 @@

                 if (isRomMicroPC(pcState.microPC())) {
                     fetchDone = true;
-                    curStaticInst = thread->decoder.fetchRomMicroop(
+                    curStaticInst = decoder.fetchRomMicroop(
                             pcState.microPC(), nullptr);
                 } else if (!curMacroStaticInst) {
                     //We're not in the middle of a macro instruction
@@ -291,19 +291,20 @@

                     //Predecode, ie bundle up an ExtMachInst
                     //If more fetch data is needed, pass it in.
- Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
-                    thread->decoder.moreBytes(pcState, fetchPC, machInst);
+                    Addr fetchPC =
+                        (pcState.instAddr() & pc_mask) + fetchOffset;
+                    decoder.moreBytes(pcState, fetchPC);

                     //If an instruction is ready, decode it.
                     //Otherwise, we'll have to fetch beyond the
-                    //MachInst at the current pc.
-                    if (thread->decoder.instReady()) {
+                    //memory chunk at the current pc.
+                    if (decoder.instReady()) {
                         fetchDone = true;
-                        instPtr = thread->decoder.decode(pcState);
+                        instPtr = decoder.decode(pcState);
                         thread->pcState(pcState);
                     } else {
                         fetchDone = false;
-                        fetchOffset += sizeof(TheISA::MachInst);
+                        fetchOffset += decoder.moreBytesSize();
                     }

                     //If we decoded an instruction and it's microcoded,
@@ -324,7 +325,7 @@
             }
         }
         // reset decoder on Checker
-        thread->decoder.reset();
+        decoder.reset();

         // Check Checker and CPU get same instruction, and record
         // any faults the CPU may have had.
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index 9b774c0..340bba5 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -91,20 +91,19 @@
             maxLineWidth);
     }

+    size_t inst_size = cpu.threads[0]->decoder.moreBytesSize();
+
     /* These assertions should be copied to the Python config. as well */
-    if ((lineSnap % sizeof(TheISA::MachInst)) != 0) {
+    if ((lineSnap % inst_size) != 0) {
         fatal("%s: fetch1LineSnapWidth must be a multiple "
-            "of sizeof(TheISA::MachInst) (%d)\n", name_,
-            sizeof(TheISA::MachInst));
+            "of the inst width (%d)\n", name_,
+            inst_size);
     }

-    if (!(maxLineWidth >= lineSnap &&
-        (maxLineWidth % sizeof(TheISA::MachInst)) == 0))
-    {
+    if ((maxLineWidth >= lineSnap && (maxLineWidth % inst_size)) != 0) {
         fatal("%s: fetch1LineWidth must be a multiple of"
-            " sizeof(TheISA::MachInst)"
-            " (%d), and >= fetch1LineSnapWidth (%d)\n",
-            name_, sizeof(TheISA::MachInst), lineSnap);
+            " the inst width (%d), and >= fetch1LineSnapWidth (%d)\n",
+            name_, inst_size, lineSnap);
     }

     if (fetchLimit < 1) {
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index 4100f91..40c339f 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -329,7 +329,7 @@
                 /* Set the inputIndex to be the MachInst-aligned offset
                  *  from lineBaseAddr of the new PC value */
                 fetch_info.inputIndex =
-                    (line_in->pc.instAddr() & BaseCPU::PCMask) -
+                    (line_in->pc.instAddr() & decoder->pcMask()) -
                     line_in->lineBaseAddr;
                 DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
                     " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
@@ -376,15 +376,13 @@
             } else {
                 uint8_t *line = line_in->line;

-                /* The instruction is wholly in the line, can just
-                 *  assign */
-                auto inst_word = *reinterpret_cast<TheISA::MachInst *>
-                                  (line + fetch_info.inputIndex);
+                /* The instruction is wholly in the line, can just copy. */
+ memcpy(decoder->moreBytesPtr(), line + fetch_info.inputIndex,
+                        decoder->moreBytesSize());

                 if (!decoder->instReady()) {
                     decoder->moreBytes(fetch_info.pc,
-                        line_in->lineBaseAddr + fetch_info.inputIndex,
-                        inst_word);
+                        line_in->lineBaseAddr + fetch_info.inputIndex);
DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
                             line_in->lineBaseAddr + fetch_info.inputIndex);
                 }
@@ -466,7 +464,7 @@
                 /* Step on the pointer into the line if there's no
                  *  complete instruction waiting */
                 if (decoder->needMoreBytes()) {
-                    fetch_info.inputIndex += sizeof(TheISA::MachInst);
+                    fetch_info.inputIndex += decoder->moreBytesSize();

                 DPRINTF(Fetch, "Updated inputIndex value PC: %s"
" inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index 571e98d..72f4312 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -114,9 +114,6 @@
         fatal("cache block (%u bytes) is not a multiple of the "
               "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);

-    // Get the size of an instruction.
-    instSize = sizeof(TheISA::MachInst);
-
     for (int i = 0; i < MaxThreads; i++) {
         fetchStatus[i] = Idle;
         decoder[i] = nullptr;
@@ -142,6 +139,9 @@
         // which may not hold the entire cache line.
         fetchBuffer[tid] = new uint8_t[fetchBufferSize];
     }
+
+    // Get the size of an instruction.
+    instSize = decoder[0]->moreBytesSize();
 }

 std::string Fetch::name() const { return cpu->name() + ".fetch"; }
@@ -1117,7 +1117,7 @@
     TheISA::PCState thisPC = pc[tid];

     Addr pcOffset = fetchOffset[tid];
-    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+ Addr fetchAddr = (thisPC.instAddr() + pcOffset) & decoder[tid]->pcMask();

     bool inRom = isRomMicroPC(thisPC.microPC());

@@ -1190,12 +1190,12 @@
     // Need to halt fetch if quiesce instruction detected
     bool quiesce = false;

-    TheISA::MachInst *cacheInsts =
-        reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
-
     const unsigned numInsts = fetchBufferSize / instSize;
     unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;

+    auto *dec_ptr = decoder[tid];
+    const Addr pc_mask = dec_ptr->pcMask();
+
     // Loop through instruction memory from the cache.
     // Keep issuing while fetchWidth is available and branch is not
     // predicted taken
@@ -1204,9 +1204,8 @@
         // We need to process more memory if we aren't going to get a
         // StaticInst from the rom, the current macroop, or what's already
         // in the decoder.
-        bool needMem = !inRom && !curMacroop &&
-            !decoder[tid]->instReady();
-        fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+        bool needMem = !inRom && !curMacroop && !dec_ptr->instReady();
+        fetchAddr = (thisPC.instAddr() + pcOffset) & pc_mask;
         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

         if (needMem) {
@@ -1222,9 +1221,11 @@
                 break;
             }

- decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]);
+            memcpy(dec_ptr->moreBytesPtr(),
+                    fetchBuffer[tid] + blkOffset * instSize, instSize);
+            decoder[tid]->moreBytes(thisPC, fetchAddr);

-            if (decoder[tid]->needMoreBytes()) {
+            if (dec_ptr->needMoreBytes()) {
                 blkOffset++;
                 fetchAddr += instSize;
                 pcOffset += instSize;
@@ -1235,8 +1236,8 @@
         // the memory we've processed so far.
         do {
             if (!(curMacroop || inRom)) {
-                if (decoder[tid]->instReady()) {
-                    staticInst = decoder[tid]->decode(thisPC);
+                if (dec_ptr->instReady()) {
+                    staticInst = dec_ptr->decode(thisPC);

                     // Increment stat of fetched instructions.
                     ++fetchStats.insts;
@@ -1258,7 +1259,7 @@
             bool newMacro = false;
             if (curMacroop || inRom) {
                 if (inRom) {
-                    staticInst = decoder[tid]->fetchRomMicroop(
+                    staticInst = dec_ptr->fetchRomMicroop(
                             thisPC.microPC(), curMacroop);
                 } else {
staticInst = curMacroop->fetchMicroop(thisPC.microPC());
@@ -1296,7 +1297,7 @@
             inRom = isRomMicroPC(thisPC.microPC());

             if (newMacro) {
-                fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
+                fetchAddr = thisPC.instAddr() & pc_mask;
                 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
                 pcOffset = 0;
                 curMacroop = NULL;
@@ -1310,7 +1311,7 @@
                 quiesce = true;
                 break;
             }
-        } while ((curMacroop || decoder[tid]->instReady()) &&
+        } while ((curMacroop || dec_ptr->instReady()) &&
                  numInst < fetchWidth &&
                  fetchQueue[tid].size() < fetchQueueSize);

@@ -1341,7 +1342,7 @@

// pipeline a fetch if we're crossing a fetch buffer boundary and not in
     // a state that would preclude fetching
-    fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+    fetchAddr = (thisPC.instAddr() + pcOffset) & pc_mask;
     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
     issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
         fetchStatus[tid] != IcacheWaitResponse &&
@@ -1538,7 +1539,7 @@
     }

     Addr pcOffset = fetchOffset[tid];
-    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+ Addr fetchAddr = (thisPC.instAddr() + pcOffset) & decoder[tid]->pcMask();

     // Align the fetch PC so its at the start of a fetch buffer segment.
     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 000cc30..c507f70 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -738,11 +738,13 @@
 Tick
 AtomicSimpleCPU::fetchInstMem()
 {
+    auto &decoder = threadInfo[curThread]->thread->decoder;
+
     Packet pkt = Packet(ifetch_req, MemCmd::ReadReq);

     // ifetch_req is initialized to read the instruction
     // directly into the CPU object's inst field.
-    pkt.dataStatic(&inst);
+    pkt.dataStatic(decoder.moreBytesPtr());

     Tick latency = sendPacket(icachePort, &pkt);
     assert(!pkt.isError());
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index e51f5a5..f13019e 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -83,7 +83,6 @@
       branchPred(p.branchPred),
       zeroReg(p.isa[0]->regClasses().at(IntRegClass).zeroReg()),
       traceData(NULL),
-      inst(),
       _status(Idle)
 {
     SimpleThread *thread;
@@ -291,13 +290,14 @@
     SimpleExecContext &t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;

+    auto &decoder = thread->decoder;
     Addr instAddr = thread->instAddr();
-    Addr fetchPC = (instAddr & PCMask) + t_info.fetchOffset;
+    Addr fetchPC = (instAddr & decoder.pcMask()) + t_info.fetchOffset;

     // set up memory request for instruction fetch
DPRINTF(Fetch, "Fetch: Inst PC:%08p, Fetch PC:%08p\n", instAddr, fetchPC);

-    req->setVirt(fetchPC, sizeof(TheISA::MachInst), Request::INST_FETCH,
+    req->setVirt(fetchPC, decoder.moreBytesSize(), Request::INST_FETCH,
                  instRequestorId(), instAddr);
 }

@@ -321,33 +321,32 @@
     // decode the instruction
     TheISA::PCState pcState = thread->pcState();

+    auto &decoder = thread->decoder;
+
     if (isRomMicroPC(pcState.microPC())) {
         t_info.stayAtPC = false;
-        curStaticInst = thread->decoder.fetchRomMicroop(
+        curStaticInst = decoder.fetchRomMicroop(
                 pcState.microPC(), curMacroStaticInst);
     } else if (!curMacroStaticInst) {
         //We're not in the middle of a macro instruction
         StaticInstPtr instPtr = NULL;

-        TheISA::Decoder *decoder = &(thread->decoder);
-
         //Predecode, ie bundle up an ExtMachInst
         //If more fetch data is needed, pass it in.
-        Addr fetchPC = (pcState.instAddr() & PCMask) + t_info.fetchOffset;
-        //if (decoder->needMoreBytes())
-            decoder->moreBytes(pcState, fetchPC, inst);
-        //else
-        //    decoder->process();
+        Addr fetchPC =
+            (pcState.instAddr() & decoder.pcMask()) + t_info.fetchOffset;
+
+        decoder.moreBytes(pcState, fetchPC);

         //Decode an instruction if one is ready. Otherwise, we'll have to
         //fetch beyond the MachInst at the current pc.
-        instPtr = decoder->decode(pcState);
+        instPtr = decoder.decode(pcState);
         if (instPtr) {
             t_info.stayAtPC = false;
             thread->pcState(pcState);
         } else {
             t_info.stayAtPC = true;
-            t_info.fetchOffset += sizeof(TheISA::MachInst);
+            t_info.fetchOffset += decoder.moreBytesSize();
         }

         //If we decoded an instruction and it's microcoded, start pulling
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 9d921b6..8d56c3b 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -102,7 +102,6 @@
     std::list<ThreadID> activeThreads;

     /** Current instruction */
-    TheISA::MachInst inst;
     StaticInstPtr curStaticInst;
     StaticInstPtr curMacroStaticInst;

diff --git a/src/cpu/simple/noncaching.cc b/src/cpu/simple/noncaching.cc
index 28878e2..ce8bf99 100644
--- a/src/cpu/simple/noncaching.cc
+++ b/src/cpu/simple/noncaching.cc
@@ -88,8 +88,10 @@
     if (bd_it == memBackdoors.end())
         return AtomicSimpleCPU::fetchInstMem();

+    auto &decoder = threadInfo[curThread]->thread->decoder;
+
     auto *bd = bd_it->second;
     Addr offset = ifetch_req->getPaddr() - bd->range().start();
-    memcpy(&inst, bd->ptr() + offset, ifetch_req->getSize());
+ memcpy(decoder.moreBytesPtr(), bd->ptr() + offset, ifetch_req->getSize());
     return 0;
 }
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index c80b16b..2814aed 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -717,11 +717,13 @@
 TimingSimpleCPU::sendFetch(const Fault &fault, const RequestPtr &req,
                            ThreadContext *tc)
 {
+    auto &decoder = threadInfo[curThread]->thread->decoder;
+
     if (fault == NoFault) {
         DPRINTF(SimpleCPU, "Sending fetch for addr %#x(pa: %#x)\n",
                 req->getVaddr(), req->getPaddr());
         ifetch_pkt = new Packet(req, MemCmd::ReadReq);
-        ifetch_pkt->dataStatic(&inst);
+        ifetch_pkt->dataStatic(decoder.moreBytesPtr());
         DPRINTF(SimpleCPU, " -- pkt addr: %#x\n", ifetch_pkt->getAddr());

         if (!icachePort.sendTimingReq(ifetch_pkt)) {

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/40175
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I0642cdb6a61e152441ca4ce47d748639175cda90
Gerrit-Change-Number: 40175
Gerrit-PatchSet: 26
Gerrit-Owner: Gabe Black <gabe.bl...@gmail.com>
Gerrit-Reviewer: Andreas Sandberg <andreas.sandb...@arm.com>
Gerrit-Reviewer: Bobby R. Bruce <bbr...@ucdavis.edu>
Gerrit-Reviewer: Daniel Carvalho <oda...@yahoo.com.br>
Gerrit-Reviewer: Gabe Black <gabe.bl...@gmail.com>
Gerrit-Reviewer: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Jason Lowe-Power <ja...@lowepower.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to