Tuan Ta has uploaded this change for review. ( https://gem5-review.googlesource.com/8184

Change subject: cpu: fixed how O3 CPU executes an exit system call
......................................................................

cpu: fixed how O3 CPU executes an exit system call

When a thread executed an exit syscall in SE mode, the thread context
was removed immediately in the same cycle, which left inflight squash
operations and trap event incomplete. The problem happened when a new
thread was assigned to the CPU later. The new thread started with some
incomplete transactions of the previous thread (e.g., squashing). This
problem could cause incorrect execution flow for the new thread (i.e.,
pc was not reset properly at the exit point), deadlock (i.e., some
stage-to-stage signals were not reset) and incorrect rename map between
logical and physical registers.

This patch adds a new state called 'Halting' to the thread context and
defers removing thread context from a CPU until a trap event initiated
by an exit syscall execution is processed. This patch also makes sure
that the removal of a thread context happens after all inflight
transactions of the to-be-removed thread in the pipeline complete.

Change-Id: If7ef1462fb8864e22b45371ee7ae67e2a5ad38b8
---
M src/cpu/o3/commit.hh
M src/cpu/o3/commit_impl.hh
M src/cpu/o3/cpu.cc
M src/cpu/o3/cpu.hh
M src/cpu/o3/decode.hh
M src/cpu/o3/decode_impl.hh
M src/cpu/o3/fetch.hh
M src/cpu/o3/fetch_impl.hh
M src/cpu/o3/iew.hh
M src/cpu/o3/iew_impl.hh
M src/cpu/o3/rename.hh
M src/cpu/o3/rename_impl.hh
M src/cpu/o3/thread_context_impl.hh
M src/cpu/thread_context.hh
M src/sim/eventq.hh
M src/sim/syscall_emul.hh
16 files changed, 266 insertions(+), 37 deletions(-)



diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index f508a37..ff9fbbd 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -193,6 +193,9 @@
     /** Initializes stage by sending back the number of free entries. */
     void startupStage();

+    /** Clear all thread-specific states */
+    void clearStates(ThreadID tid);
+
     /** Initializes the draining of commit. */
     void drain();

diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index d32493c..1c53332 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -383,6 +383,22 @@

 template <class Impl>
 void
+DefaultCommit<Impl>::clearStates(ThreadID tid)
+{
+    commitStatus[tid] = Idle;
+    changedROBNumEntries[tid] = false;
+    checkEmptyROB[tid] = false;
+    trapInFlight[tid] = false;
+    committedStores[tid] = false;
+    trapSquash[tid] = false;
+    tcSquash[tid] = false;
+    pc[tid].set(0);
+    lastCommitedSeqNum[tid] = 0;
+    squashAfterInst[tid] = NULL;
+}
+
+template <class Impl>
+void
 DefaultCommit<Impl>::drain()
 {
     drainPending = true;
@@ -830,6 +846,13 @@
         if (trapSquash[tid]) {
             assert(!tcSquash[tid]);
             squashFromTrap(tid);
+
+            // If the thread is trying to exit (i.e., an exit syscall was
+            // executed), this trapSquash was originated by the exit
+            // syscall earlier. In this case, schedule an exit event in
+            // the next cycle to fully terminate this thread
+            if (cpu->isThreadExiting(tid))
+                cpu->scheduleThreadExitEvent(tid);
         } else if (tcSquash[tid]) {
             assert(commitStatus[tid] != TrapPending);
             squashFromTC(tid);
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index c4bc13f..301d458 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -143,6 +143,8 @@
       dtb(params->dtb),
       tickEvent([this]{ tick(); }, "FullO3CPU tick",
                 false, Event::CPU_Tick_Pri),
+      threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads",
+                false, Event::CPU_Exit_Pri),
 #ifndef NDEBUG
       instcount(0),
 #endif
@@ -791,7 +793,7 @@
 FullO3CPU<Impl>::haltContext(ThreadID tid)
 {
     //For now, this is the same as deallocate
-    DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
+    DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating\n", tid);
     assert(!switchedOut());

     deactivateThread(tid);
@@ -868,43 +870,57 @@
     // here to alleviate the case for double-freeing registers
     // in SMT workloads.

-    // Unbind Int Regs from Rename Map
-    for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs;
-         reg_id.index()++) {
-        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
-        scoreboard.unsetReg(phys_reg);
-        freeList.addReg(phys_reg);
-    }
+//    // Unbind Int Regs from Rename Map
+//    for (RegId reg_id(IntRegClass, 0);
+//         reg_id.index() < TheISA::NumIntRegs;
+//         reg_id.index()++) {
+//        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
+//        scoreboard.setReg(phys_reg);
+//        freeList.addReg(phys_reg);
+//    }
+//
+//    // Unbind Float Regs from Rename Map
+//    for (RegId reg_id(FloatRegClass, 0);
+//         reg_id.index() < TheISA::NumFloatRegs;
+//         reg_id.index()++) {
+//        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
+//        scoreboard.setReg(phys_reg);
+//        freeList.addReg(phys_reg);
+//    }
+//
+//    // Unbind condition-code Regs from Rename Map
+//    for (RegId reg_id(CCRegClass, 0);
+//         reg_id.index() < TheISA::NumCCRegs;
+//         reg_id.index()++) {
+//        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
+//        scoreboard.setReg(phys_reg);
+//        freeList.addReg(phys_reg);
+//    }

-    // Unbind Float Regs from Rename Map
- for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs;
-         reg_id.index()++) {
-        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
-        scoreboard.unsetReg(phys_reg);
-        freeList.addReg(phys_reg);
-    }
+//    // Squash Throughout Pipeline
+//    DynInstPtr inst = commit.rob->readHeadInst(tid);
+//    InstSeqNum squash_seq_num = inst->seqNum;
+//    fetch.squash(0, squash_seq_num, inst, tid);
+//    decode.squash(tid);
+//    rename.squash(squash_seq_num, tid);
+//    iew.squash(tid);
+//    iew.ldstQueue.squash(squash_seq_num, tid);
+//    commit.rob->squash(squash_seq_num, tid);

-    // Unbind condition-code Regs from Rename Map
-    for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
-         reg_id.index()++) {
-        PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
-        scoreboard.unsetReg(phys_reg);
-        freeList.addReg(phys_reg);
-    }
+    // clear all thread-specific states in each stage of the pipeline
+    // since this thread is going to be completely removed from the CPU
+    commit.clearStates(tid);
+    fetch.clearStates(tid);
+    decode.clearStates(tid);
+    rename.clearStates(tid);
+    iew.clearStates(tid);

-    // Squash Throughout Pipeline
-    DynInstPtr inst = commit.rob->readHeadInst(tid);
-    InstSeqNum squash_seq_num = inst->seqNum;
-    fetch.squash(0, squash_seq_num, inst, tid);
-    decode.squash(tid);
-    rename.squash(squash_seq_num, tid);
-    iew.squash(tid);
-    iew.ldstQueue.squash(squash_seq_num, tid);
-    commit.rob->squash(squash_seq_num, tid);
-
-
+    // at this step, all instructions in the pipeline should be already
+    // either committed successfully or squashed. All thread-specific
+    // queues in the pipeline must be empty.
     assert(iew.instQueue.getCount(tid) == 0);
     assert(iew.ldstQueue.getCount(tid) == 0);
+    assert(commit.rob->isEmpty(tid));

     // Reset ROB/IQ/LSQ Entries

@@ -1822,5 +1838,78 @@
     }
 }

+template <class Impl>
+void
+FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid)
+{
+    DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid);
+
+    // make sure the thread is Active
+    assert(std::find(activeThreads.begin(), activeThreads.end(), tid)
+                                              != activeThreads.end());
+
+    // make sure the thread has not been added to the list yet
+    assert(exitingThreads.count(tid) == 0);
+
+    // add the thread to exitingThreads list to mark that this thread is
+    // trying to exit. The boolean value in the pair denotes if a thread is
+ // ready to exit. The thread is not ready to exit until the corresponding + // exit trap event is processed in the future. Until then, it'll be still
+    // an active thread that is trying to exit.
+    exitingThreads.emplace(std::make_pair(tid, false));
+}
+
+template <class Impl>
+bool
+FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const
+{
+    return exitingThreads.count(tid) == 1;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid)
+{
+    assert(exitingThreads.count(tid) == 1);
+
+    // exit trap event has been processed. Now, the thread is ready to exit
+    // and be removed from the CPU.
+    exitingThreads[tid] = true;
+
+    // we schedule a threadExitEvent in the next cycle to properly clean
+    // up the thread's states in the pipeline. threadExitEvent has lower
+    // priority than tickEvent, so the cleanup will happen at the very end
+ // of the next cycle after all pipeline stages complete their operations.
+    // We want all stages to complete squashing instructions before doing
+    // the cleanup.
+    if (!threadExitEvent.scheduled()) {
+        schedule(threadExitEvent, nextCycle());
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::exitThreads()
+{
+    // there must be at least one thread trying to exit
+    assert(exitingThreads.size() > 0);
+
+    // terminate all threads that are ready to exit
+    auto it = exitingThreads.begin();
+    while (it != exitingThreads.end()) {
+        ThreadID thread_id = it->first;
+        bool readyToExit = it->second;
+
+        if (readyToExit) {
+            DPRINTF(O3CPU, "Exiting thread %d\n", thread_id);
+            haltContext(thread_id);
+            tcBase(thread_id)->setStatus(ThreadContext::Halted);
+            it = exitingThreads.erase(it);
+        } else {
+            it++;
+        }
+    }
+}
+
 // Forward declaration of FullO3CPU.
 template class FullO3CPU<O3CPUImpl>;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 10af087..2724467 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -202,6 +202,9 @@
     /** The tick event used for scheduling CPU ticks. */
     EventFunctionWrapper tickEvent;

+    /** The exit event used for terminating all ready-to-exit threads */
+    EventFunctionWrapper threadExitEvent;
+
     /** Schedule tick event, regardless of its current state. */
     void scheduleTickEvent(Cycles delay)
     {
@@ -328,6 +331,21 @@
     void serializeThread(CheckpointOut &cp, ThreadID tid) const override;
     void unserializeThread(CheckpointIn &cp, ThreadID tid) override;

+    /** Insert tid to the list of threads trying to exit */
+    void addThreadToExitingList(ThreadID tid);
+
+    /** Is the thread trying to exit? */
+    bool isThreadExiting(ThreadID tid) const;
+
+    /**
+     *  If a thread is trying to exit and its corresponding trap event
+     *  has been completed, schedule an event to terminate the thread.
+     */
+    void scheduleThreadExitEvent(ThreadID tid);
+
+    /** Terminate all threads that are ready to exit */
+    void exitThreads();
+
   public:
     /** Executes a syscall.
      * @todo: Determine if this needs to be virtual.
@@ -626,6 +644,13 @@
     /** Active Threads List */
     std::list<ThreadID> activeThreads;

+    /**
+     *  This is a list of threads that are trying to exit. Each thread id
+     *  is mapped to a boolean value denoting whether the thread is ready
+     *  to exit.
+     */
+    std::unordered_map<ThreadID, bool> exitingThreads;
+
     /** Integer Register Scoreboard */
     Scoreboard scoreboard;

diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh
index 006219a..4c8e794 100644
--- a/src/cpu/o3/decode.hh
+++ b/src/cpu/o3/decode.hh
@@ -102,6 +102,10 @@
     DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params);

     void startupStage();
+
+    /** Clear all thread-specific states */
+    void clearStates(ThreadID tid);
+
     void resetStage();

     /** Returns the name of decode. */
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 15ec76f..ad0d087 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -86,6 +86,14 @@

 template<class Impl>
 void
+DefaultDecode<Impl>::clearStates(ThreadID tid)
+{
+    decodeStatus[tid] = Idle;
+    stalls[tid].rename = false;
+}
+
+template<class Impl>
+void
 DefaultDecode<Impl>::resetStage()
 {
     _status = Inactive;
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 672fb49..da79e83 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -225,6 +225,9 @@
     /** Initialize stage. */
     void startupStage();

+    /** Clear all thread-specific states*/
+    void clearStates(ThreadID tid);
+
     /** Handles retrying the fetch access. */
     void recvReqRetry();

diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 2e8ec67..9692c7e 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -341,6 +341,26 @@

 template<class Impl>
 void
+DefaultFetch<Impl>::clearStates(ThreadID tid)
+{
+    fetchStatus[tid] = Running;
+    pc[tid] = cpu->pcState(tid);
+    fetchOffset[tid] = 0;
+    macroop[tid] = NULL;
+    delayedCommit[tid] = false;
+    memReq[tid] = NULL;
+    stalls[tid].decode = false;
+    stalls[tid].drain = false;
+    fetchBufferPC[tid] = 0;
+    fetchBufferValid[tid] = false;
+    fetchQueue[tid].clear();
+
+    // TODO not sure what to do with priorityList for now
+    // priorityList.push_back(tid);
+}
+
+template<class Impl>
+void
 DefaultFetch<Impl>::resetStage()
 {
     numInst = 0;
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index de88340..a723cf4 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -147,6 +147,9 @@
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
     void startupStage();

+    /** Clear all thread-specific states */
+    void clearStates(ThreadID tid);
+
     /** Sets main time buffer used for backwards communication. */
     void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);

diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 80d7adc..d4f3766 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -323,6 +323,19 @@

 template<class Impl>
 void
+DefaultIEW<Impl>::clearStates(ThreadID tid)
+{
+    toRename->iewInfo[tid].usedIQ = true;
+    toRename->iewInfo[tid].freeIQEntries =
+        instQueue.numFreeEntries(tid);
+
+    toRename->iewInfo[tid].usedLSQ = true;
+ toRename->iewInfo[tid].freeLQEntries = ldstQueue.numFreeLoadEntries(tid); + toRename->iewInfo[tid].freeSQEntries = ldstQueue.numFreeStoreEntries(tid);
+}
+
+template<class Impl>
+void
 DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
     timeBuffer = tb_ptr;
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index d0f6ba1..15ed405 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -169,6 +169,9 @@
     /** Initializes variables for the stage. */
     void startupStage();

+    /** Clear all thread-specific states */
+    void clearStates(ThreadID tid);
+
     /** Sets pointer to list of active threads. */
     void setActiveThreads(std::list<ThreadID> *at_ptr);

diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index bc024f6..821f7ff 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -240,6 +240,28 @@

 template <class Impl>
 void
+DefaultRename<Impl>::clearStates(ThreadID tid)
+{
+    renameStatus[tid] = Idle;
+
+    freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
+ freeEntries[tid].lqEntries = iew_ptr->ldstQueue.numFreeLoadEntries(tid); + freeEntries[tid].sqEntries = iew_ptr->ldstQueue.numFreeStoreEntries(tid);
+    freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
+    emptyROB[tid] = true;
+
+    stalls[tid].iew = false;
+    serializeInst[tid] = NULL;
+
+    instsInProgress[tid] = 0;
+    loadsInProgress[tid] = 0;
+    storesInProgress[tid] = 0;
+
+    serializeOnNextInst[tid] = false;
+}
+
+template <class Impl>
+void
 DefaultRename<Impl>::resetStage()
 {
     _status = Inactive;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index d9f84fb..4e4be21 100644
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -127,11 +127,18 @@
 {
     DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", threadId());

-    if (thread->status() == ThreadContext::Halted)
+    if (thread->status() == ThreadContext::Halting ||
+        thread->status() == ThreadContext::Halted)
         return;

-    thread->setStatus(ThreadContext::Halted);
-    cpu->haltContext(thread->threadId());
+    // the thread is not going to halt/terminate immediately in this cycle.
+    // The thread will be removed after an exit trap is processed
+    // (e.g., after trapLatency cycles). Until then, the thread's status
+    // will be Halting.
+    thread->setStatus(ThreadContext::Halting);
+
+ // add this thread to the exiting list to mark that it is trying to exit.
+    cpu->addThreadToExitingList(thread->threadId());
 }

 template <class Impl>
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 1e30649..d6753d8 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -114,6 +114,10 @@
         /// synchronization, etc.
         Suspended,

+        /// Trying to exit and waiting for an event to completely exit.
+        /// Entered when target executes an exit syscall.
+        Halting,
+
         /// Permanently shut down.  Entered when target executes
         /// m5exit pseudo-instruction.  When all contexts enter
         /// this state, the simulation will terminate.
diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index 781bcdb..7bbb9f5 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -161,6 +161,9 @@
     /// (such as writebacks).
     static const Priority CPU_Tick_Pri =                50;

+    /// If we want to exit a thread in a CPU, it comes after CPU_Tick_Pri
+    static const Priority CPU_Exit_Pri =                64;
+
     /// Statistics events (dump, reset, etc.) come after
     /// everything else, but before exit.
     static const Priority Stat_Event_Pri =              90;
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index eaa5e54..046968c 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -2071,5 +2071,4 @@
     return 0;
 }

-
 #endif // __SIM_SYSCALL_EMUL_HH__

--
To view, visit https://gem5-review.googlesource.com/8184
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: If7ef1462fb8864e22b45371ee7ae67e2a5ad38b8
Gerrit-Change-Number: 8184
Gerrit-PatchSet: 1
Gerrit-Owner: Tuan Ta <q...@cornell.edu>
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to