[gem5-dev] Change in gem5/gem5[develop]: cpu-o3: Mostly use PCStateBase instead of TheISA::PCState.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52051 )


 (

9 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: cpu-o3: Mostly use PCStateBase instead of TheISA::PCState.
..

cpu-o3: Mostly use PCStateBase instead of TheISA::PCState.

There are a few places where TheISA::PCState is still necessary,
specifically when checking if a PC is branching, and also when getting
the nextInstAddr.

It's likely that checking if a PC is branching will become part of the
base PCState interface, but nextInstAddr will likely be removed from the
ThreadContext, ExecContext, etc, interfaces, and then removed from the
interfaces in the O3 which doesn't seem to use them internally.

Change-Id: I499f31d569b9b0c665a745caf612d1e96babf37a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52051
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/cpu/o3/decode.cc
M src/cpu/o3/dyn_inst.cc
M src/cpu/o3/dyn_inst.hh
M src/cpu/o3/cpu.cc
M src/cpu/o3/commit.cc
M src/cpu/o3/commit.hh
M src/cpu/o3/comm.hh
M src/cpu/o3/fetch.cc
M src/cpu/o3/fetch.hh
M src/cpu/o3/iew.cc
10 files changed, 187 insertions(+), 145 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index fc10848..b2da358 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -94,7 +94,7 @@
 DynInstPtr mispredictInst[MaxThreads];
 Addr mispredPC[MaxThreads];
 InstSeqNum squashedSeqNum[MaxThreads];
-TheISA::PCState pc[MaxThreads];
+std::unique_ptr pc[MaxThreads];

 bool squash[MaxThreads];
 bool branchMispredict[MaxThreads];
@@ -114,7 +114,7 @@
 {
 struct DecodeComm
 {
-TheISA::PCState nextPC;
+std::unique_ptr nextPC;
 DynInstPtr mispredictInst;
 DynInstPtr squashInst;
 InstSeqNum doneSeqNum;
@@ -169,7 +169,7 @@
 /// The pc of the next instruction to execute. This is the next
 /// instruction for a branch mispredict, but the same instruction  
for

 /// order violation and the like
-TheISA::PCState pc; // *F
+std::unique_ptr pc; // *F

 /// Provide fetch the instruction that mispredicted, if this
 /// pointer is not-null a misprediction occured
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 421f1e5..01ec0c8 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -120,7 +120,7 @@
 trapSquash[tid] = false;
 tcSquash[tid] = false;
 squashAfterInst[tid] = nullptr;
-pc[tid].set(0);
+pc[tid].reset(params.isa[0]->newPCState());
 youngestSeqNum[tid] = 0;
 lastCommitedSeqNum[tid] = 0;
 trapInFlight[tid] = false;
@@ -340,7 +340,7 @@
 committedStores[tid] = false;
 trapSquash[tid] = false;
 tcSquash[tid] = false;
-pc[tid].set(0);
+pc[tid].reset(cpu->tcBase(tid)->getIsaPtr()->newPCState());
 lastCommitedSeqNum[tid] = 0;
 squashAfterInst[tid] = NULL;
 }
@@ -382,7 +382,7 @@
  *   address mappings. This can happen on for example x86.
  */
 for (ThreadID tid = 0; tid < numThreads; tid++) {
-if (pc[tid].microPC() != 0)
+if (pc[tid]->microPC() != 0)
 return false;
 }

@@ -561,7 +561,7 @@
 toIEW->commitInfo[tid].mispredictInst = NULL;
 toIEW->commitInfo[tid].squashInst = NULL;

-toIEW->commitInfo[tid].pc = pc[tid];
+set(toIEW->commitInfo[tid].pc, pc[tid]);
 }

 void
@@ -569,7 +569,7 @@
 {
 squashAll(tid);

-DPRINTF(Commit, "Squashing from trap, restarting at PC %s\n", pc[tid]);
+DPRINTF(Commit, "Squashing from trap, restarting at PC %s\n",  
*pc[tid]);


 thread[tid]->trapPending = false;
 thread[tid]->noSquashFromTC = false;
@@ -586,7 +586,7 @@
 {
 squashAll(tid);

-DPRINTF(Commit, "Squashing from TC, restarting at PC %s\n", pc[tid]);
+DPRINTF(Commit, "Squashing from TC, restarting at PC %s\n", *pc[tid]);

 thread[tid]->noSquashFromTC = false;
 assert(!thread[tid]->trapPending);
@@ -601,7 +601,7 @@
 Commit::squashFromSquashAfter(ThreadID tid)
 {
 DPRINTF(Commit, "Squashing after squash after request, "
-"restarting at PC %s\n", pc[tid]);
+"restarting at PC %s\n", *pc[tid]);

 squashAll(tid);
 // Make sure to inform the fetch stage of which instruction caused
@@ -843,8 +843,7 @@
 }

 DPRINTF(Commit, "[tid:%i] Redirecting to PC %#x\n",
-tid,
-fromIEW->pc[tid].nextInstAddr());
+tid, *fromIEW->pc[tid]);

 commitStatus[tid] = ROBSquashing;

@@ -884,7 +883,7 @@
 ++stats.branchMispredicts;
 }

-

[gem5-dev] Change in gem5/gem5[develop]: cpu,arch: Eliminate the ThreadContext::nextInstAddr method.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52053 )


 (

9 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: cpu,arch: Eliminate the ThreadContext::nextInstAddr  
method.

..

cpu,arch: Eliminate the ThreadContext::nextInstAddr method.

This is no longer used.

Change-Id: I0ec170fb3b450430bbeff0a3c37bcdafe70c92b0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52053
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/cpu/simple_thread.hh
M src/cpu/checker/cpu.hh
M src/cpu/o3/thread_context.hh
M src/cpu/o3/dyn_inst.hh
M src/cpu/thread_context.hh
M src/arch/generic/pcstate.hh
M src/cpu/o3/cpu.cc
M src/cpu/o3/cpu.hh
M src/cpu/o3/commit.hh
M src/cpu/checker/thread_context.hh
M src/arch/arm/fastmodel/iris/thread_context.cc
M src/arch/arm/fastmodel/iris/thread_context.hh
12 files changed, 15 insertions(+), 62 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/fastmodel/iris/thread_context.cc  
b/src/arch/arm/fastmodel/iris/thread_context.cc

index 11e4f17..a6b8c2a 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.cc
+++ b/src/arch/arm/fastmodel/iris/thread_context.cc
@@ -597,12 +597,6 @@
 return pcState().instAddr();
 }

-Addr
-ThreadContext::nextInstAddr() const
-{
-return pcState().nextInstAddr();
-}
-
 RegVal
 ThreadContext::readMiscRegNoEffect(RegIndex misc_reg) const
 {
diff --git a/src/arch/arm/fastmodel/iris/thread_context.hh  
b/src/arch/arm/fastmodel/iris/thread_context.hh

index 8be9281..c5e4cc3 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.hh
+++ b/src/arch/arm/fastmodel/iris/thread_context.hh
@@ -351,7 +351,6 @@
 ArmISA::PCState pcState() const override;
 void pcState(const ArmISA::PCState ) override;
 Addr instAddr() const override;
-Addr nextInstAddr() const override;

 RegVal readMiscRegNoEffect(RegIndex misc_reg) const override;
 RegVal
diff --git a/src/arch/generic/pcstate.hh b/src/arch/generic/pcstate.hh
index a1348a5..10f6618 100644
--- a/src/arch/generic/pcstate.hh
+++ b/src/arch/generic/pcstate.hh
@@ -263,17 +263,6 @@
 MicroPC nupc() const { return _nupc; }
 void nupc(MicroPC val) { _nupc = val; }

-/**
- * Returns the memory address the bytes of the next instruction came  
from.

- *
- * @return Memory address of the next instruction's encoding.
- */
-Addr
-nextInstAddr() const
-{
-return _npc;
-}
-
 // Reset the macroop's upc without advancing the regular pc.
 void
 uReset()
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index a83fe08..7043216 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -363,7 +363,6 @@
 thread->pcState(val);
 }
 Addr instAddr() { return thread->instAddr(); }
-Addr nextInstAddr() { return thread->nextInstAddr(); }
 MicroPC microPC() { return thread->microPC(); }
 //

diff --git a/src/cpu/checker/thread_context.hh  
b/src/cpu/checker/thread_context.hh

index 8618712..62abb21 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -351,9 +351,6 @@
 Addr instAddr() const override { return actualTC->instAddr(); }

 /** Reads this thread's next PC. */
-Addr nextInstAddr() const override { return actualTC->nextInstAddr(); }
-
-/** Reads this thread's next PC. */
 MicroPC microPC() const override { return actualTC->microPC(); }

 RegVal
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 8a005e2..ed462e0 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -314,13 +314,6 @@
 /** Returns the PC of a specific thread. */
 Addr instAddr(ThreadID tid) { return pc[tid]->instAddr(); }

-/** Returns the next PC of a specific thread. */
-Addr
-nextInstAddr(ThreadID tid)
-{
-return pc[tid]->as().nextInstAddr();
-}
-
 /** Reads the micro PC of a specific thread. */
 Addr microPC(ThreadID tid) { return pc[tid]->microPC(); }

diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 05d3a51..69c6ed8 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1325,12 +1325,6 @@
 return commit.instAddr(tid);
 }

-Addr
-CPU::nextInstAddr(ThreadID tid)
-{
-return commit.nextInstAddr(tid);
-}
-
 MicroPC
 CPU::microPC(ThreadID tid)
 {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index a2002c0..edb694e 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -396,9 +396,6 @@
 /** Reads the commit micro PC of a specific thread. */
 MicroPC microPC(ThreadID tid);

-/** Reads the next PC of a specific thread. */
-Addr nextInstAddr(ThreadID tid);
-
 /** Initiates a 

[gem5-dev] Change in gem5/gem5[develop]: cpu-minor: Convert the rest of the CPU to use PCStateBase *|&.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52049 )


 (

5 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: cpu-minor: Convert the rest of the CPU to use PCStateBase  
*|&.

..

cpu-minor: Convert the rest of the CPU to use PCStateBase *|&.

Change-Id: I528622cd5ad82dbcefe1462401841c6e28359ed3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52049
Tested-by: kokoro 
Reviewed-by: ZHENGRONG WANG 
Maintainer: ZHENGRONG WANG 
---
M src/cpu/minor/decode.cc
M src/cpu/minor/decode.hh
M src/cpu/minor/fetch2.cc
M src/cpu/minor/fetch2.hh
M src/cpu/minor/fetch1.cc
M src/cpu/minor/fetch1.hh
M src/cpu/minor/pipe_data.cc
M src/cpu/minor/pipe_data.hh
8 files changed, 134 insertions(+), 122 deletions(-)

Approvals:
  ZHENGRONG WANG: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc
index e82811f..5adf2ca 100644
--- a/src/cpu/minor/decode.cc
+++ b/src/cpu/minor/decode.cc
@@ -184,7 +184,7 @@
  * static_inst. */
 static_micro_inst =
 static_inst->fetchMicroop(
-decode_info.microopPC.microPC());
+decode_info.microopPC->microPC());

 output_inst =
 new MinorDynInst(static_micro_inst, inst->id);
@@ -201,19 +201,18 @@

 DPRINTF(Decode, "Microop decomposition inputIndex:"
 " %d output_index: %d lastMicroop: %s microopPC:"
-" %d.%d inst: %d\n",
+" %s inst: %d\n",
 decode_info.inputIndex, output_index,
 (static_micro_inst->isLastMicroop() ?
 "true" : "false"),
-decode_info.microopPC.instAddr(),
-decode_info.microopPC.microPC(),
+*decode_info.microopPC,
 *output_inst);

 /* Acknowledge that the static_inst isn't mine, it's my
  * parent macro-op's */
 parent_static_inst = static_inst;

-static_micro_inst->advancePC(decode_info.microopPC);
+static_micro_inst->advancePC(*decode_info.microopPC);

 /* Step input if this is the last micro-op */
 if (static_micro_inst->isLastMicroop()) {
diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh
index 8e20e8b..6e9cb62 100644
--- a/src/cpu/minor/decode.hh
+++ b/src/cpu/minor/decode.hh
@@ -94,40 +94,35 @@

 struct DecodeThreadInfo
 {
-
-/** Default Constructor */
-DecodeThreadInfo() :
-inputIndex(0),
-inMacroop(false),
-execSeqNum(InstId::firstExecSeqNum),
-blocked(false)
-{ }
+DecodeThreadInfo() {}

 DecodeThreadInfo(const DecodeThreadInfo& other) :
 inputIndex(other.inputIndex),
 inMacroop(other.inMacroop),
 execSeqNum(other.execSeqNum),
 blocked(other.blocked)
-{ }
+{
+set(microopPC, other.microopPC);
+}


 /** Index into the inputBuffer's head marking the start of  
unhandled

  *  instructions */
-unsigned int inputIndex;
+unsigned int inputIndex = 0;

 /** True when we're in the process of decomposing a micro-op and
  *  microopPC will be valid.  This is only the case when there  
isn't
  *  sufficient space in Executes input buffer to take the whole of  
a
  *  decomposed instruction and some of that instructions micro-ops  
must

  *  be generated in a later cycle */
-bool inMacroop;
-TheISA::PCState microopPC;
+bool inMacroop = false;
+std::unique_ptr microopPC;

 /** Source of execSeqNums to number instructions. */
-InstSeqNum execSeqNum;
+InstSeqNum execSeqNum = InstId::firstExecSeqNum;

 /** Blocked indication for report */
-bool blocked;
+bool blocked = false;
 };

 std::vector decodeInfo;
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index 193ebaa..72e9957 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -83,6 +83,9 @@
 numFetchesInMemorySystem(0),
 numFetchesInITLB(0)
 {
+for (auto : fetchInfo)
+info.pc.reset(params.isa[0]->newPCState());
+
 if (lineSnap == 0) {
 lineSnap = cpu.cacheLineSize();
 DPRINTF(Fetch, "lineSnap set to cache line size of: %d\n",
@@ -511,8 +514,8 @@
 thread.state = FetchRunning;
 break;
 }
-thread.pc = branch.target;
-

[gem5-dev] Change in gem5/gem5[develop]: cpu-minor: Disentangle the PC and fetch address.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52048 )


 (

6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: cpu-minor: Disentangle the PC and fetch address.
..

cpu-minor: Disentangle the PC and fetch address.

The minor CPU was using a PCState object both to track redirects when
taking a branch, etc, and to track where to fetch a line of memory from.
It would need to create a new PCState object, or at least update the
existing one, whenever it needed to advance to the next line.

This is problematic since it means the minor CPU needs to know how to
create or set a PCState object, and since it by necessity only
understands the most basic aspect of a PCState, what the address is, it
can only set that, with all the other potential attributes staying at
their old values or getting set to some default.

Instead, this change separates the two. There is now a PC which is used
for redirects which the later stages will only pick up if there is a
change in "sequence", the same behavior as before. This PC will only
ever be set when changing sequence, and will otherwise not be
meaningful/useful.

There is also now a separate fetch address which is what the fetch stage
uses to get new lines. This was all the PC value that was artificially
updated was used for anyway.

Change-Id: Ia64bbe21e980566ae77786999689c9c8a94e9378
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52048
Tested-by: kokoro 
Reviewed-by: ZHENGRONG WANG 
Maintainer: ZHENGRONG WANG 
---
M src/cpu/minor/fetch1.cc
M src/cpu/minor/fetch1.hh
M src/cpu/minor/pipe_data.hh
3 files changed, 58 insertions(+), 12 deletions(-)

Approvals:
  ZHENGRONG WANG: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index 198ed52..193ebaa 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -157,7 +157,7 @@
 /* If line_offset != 0, a request is pushed for the remainder of the
  * line. */
 /* Use a lower, sizeof(MachInst) aligned address for the fetch */
-Addr aligned_pc = thread.pc.instAddr() & ~((Addr) lineSnap - 1);
+Addr aligned_pc = thread.fetchAddr & ~((Addr) lineSnap - 1);
 unsigned int line_offset = aligned_pc % lineSnap;
 unsigned int request_size = maxLineWidth - line_offset;

@@ -166,17 +166,18 @@
 thread.streamSeqNum, thread.predictionSeqNum,
 lineSeqNum);

-FetchRequestPtr request = new FetchRequest(*this, request_id,  
thread.pc);

+FetchRequestPtr request = new FetchRequest(*this, request_id,
+thread.fetchAddr);

 DPRINTF(Fetch, "Inserting fetch into the fetch queue "
 "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
-request_id, aligned_pc, thread.pc, line_offset, request_size);
+request_id, aligned_pc, thread.fetchAddr, line_offset,  
request_size);


 request->request->setContext(cpu.threads[tid]->getTC()->contextId());
 request->request->setVirt(
 aligned_pc, request_size, Request::INST_FETCH,  
cpu.instRequestorId(),

 /* I've no idea why we need the PC, but give it */
-thread.pc.instAddr());
+thread.fetchAddr);

 DPRINTF(Fetch, "Submitting ITLB request\n");
 numFetchesInITLB++;
@@ -200,7 +201,7 @@
 /* Step the PC for the next line onto the line aligned next address.
  * Note that as instructions can span lines, this PC is only a
  * reliable 'new' PC if the next line has a new stream sequence  
number. */

-thread.pc.set(aligned_pc + request_size);
+thread.fetchAddr = aligned_pc + request_size;
 }

 std::ostream &
@@ -511,6 +512,7 @@
 break;
 }
 thread.pc = branch.target;
+thread.fetchAddr = thread.pc.instAddr();
 }

 void
@@ -543,8 +545,10 @@
 line.setFault(response->fault);
 /* Make sequence numbers valid in return */
 line.id = response->id;
-/* Set PC to virtual address */
-line.pc = response->pc;
+/* Set the PC in case there was a sequence change */
+line.pc = thread.pc;
+/* Set fetch address to virtual address */
+line.fetchAddr = response->pc;
 /* Set the lineBase, which is a sizeof(MachInst) aligned address <=
  *  pc.instAddr() */
 line.lineBaseAddr = response->request->getVaddr();
@@ -712,6 +716,7 @@
 ThreadContext *thread_ctx = cpu.getContext(tid);
 Fetch1ThreadInfo  = fetchInfo[tid];
 thread.pc = thread_ctx->pcState();
+thread.fetchAddr = thread.pc.instAddr();
 thread.state = FetchRunning;
 thread.wakeupGuard = true;
 DPRINTF(Fetch, "[tid:%d]: Changing stream wakeup %s\n",
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh
index dcdf35e..434fbba 100644
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -139,7 +139,7 @@
  

[gem5-dev] Change in gem5/gem5[develop]: cpu: Reimplement pcState(Addr) without using the PCState constructor.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52054 )


Change subject: cpu: Reimplement pcState(Addr) without using the PCState  
constructor.

..

cpu: Reimplement pcState(Addr) without using the PCState constructor.

Use the BaseISA::newPCState(Addr) method instead, so that we don't need
to know how to build PCState objects for the given ISA.

Because the pcState() accessor still takes a const reference to a
PCState, we still need to use the TheISA::PCState type to call it. In
the future this will also take a PCStatePtr, so that use will go away.

Change-Id: I8f2f66b58c342e8c455d438047857c0119566b2b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52054
Reviewed-by: Daniel Carvalho 
Maintainer: Gabe Black 
Tested-by: kokoro 
---
M src/cpu/thread_context.hh
1 file changed, 25 insertions(+), 1 deletion(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 1f04e3c..2fd22ff 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -226,7 +226,11 @@
 virtual TheISA::PCState pcState() const = 0;

 virtual void pcState(const TheISA::PCState ) = 0;
-void pcState(Addr addr) { pcState(TheISA::PCState(addr)); }
+void
+pcState(Addr addr)
+{
+pcState(getIsaPtr()->newPCState(addr)->as());
+}

 void
 setNPC(Addr val)

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52054
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I8f2f66b58c342e8c455d438047857c0119566b2b
Gerrit-Change-Number: 52054
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Earl Ou 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: cpu: Stop using the ThreadContext::nextInstAddr method.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52052 )


Change subject: cpu: Stop using the ThreadContext::nextInstAddr method.
..

cpu: Stop using the ThreadContext::nextInstAddr method.

The PCState already contains this information internally, and it can be
compared, printed, etc, implicitly alongside all the other info in the
PCState, everywhere this method was being used.

Change-Id: I30705f30a135d4ffbc3279c366dafb1184445c70
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52052
Reviewed-by: Daniel Carvalho 
Maintainer: Gabe Black 
Tested-by: kokoro 
---
M src/cpu/checker/cpu_impl.hh
M src/arch/arm/nativetrace.cc
M src/cpu/o3/iew.cc
3 files changed, 27 insertions(+), 17 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc
index 304f4f6..3cafcf7 100644
--- a/src/arch/arm/nativetrace.cc
+++ b/src/arch/arm/nativetrace.cc
@@ -142,7 +142,7 @@
 ThreadContext *tc = record->getThread();
 // This area is read only on the target. It can't stop there to tell us
 // what's going on, so we should skip over anything there also.
-if (tc->nextInstAddr() > 0x)
+if (tc->pcState().npc() > 0x)
 return;
 nState.update(this);
 mState.update(tc);
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index 9d5c260..436ef96 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -518,10 +518,9 @@
 }
 }

-if (inst->nextInstAddr() != thread->nextInstAddr()) {
-warn("%lli: Instruction next PCs do not match! Inst: %#x, "
- "checker: %#x",
- curTick(), inst->nextInstAddr(), thread->nextInstAddr());
+if (inst->pcState() != thread->pcState()) {
+warn("%lli: Instruction PCs do not match! Inst: %s, checker: %s",
+ curTick(), inst->pcState(), thread->pcState());
 handleError(inst);
 }

@@ -644,10 +643,9 @@
 Checker::dumpAndExit(const DynInstPtr )
 {
 cprintf("Error detected, instruction information:\n");
-cprintf("PC:%s, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n"
+cprintf("PC:%s\n[sn:%lli]\n[tid:%i]\n"
 "Completed:%i\n",
 inst->pcState(),
-inst->nextInstAddr(),
 inst->seqNum,
 inst->threadNumber,
 inst->isCompleted());
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 79e0783..2795919 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -1602,17 +1602,12 @@

 DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
 "Branch mispredict detected.\n",
-tid,inst->seqNum);
-DPRINTF(IEW, "[tid:%i] [sn:%llu] Predicted target "
-"was PC:%#x, NPC:%#x\n",
-tid,inst->seqNum,
-inst->predInstAddr(), inst->predNextInstAddr());
+tid, inst->seqNum);
+DPRINTF(IEW, "[tid:%i] [sn:%llu] Predicted target was  
PC: %s\n",

+tid, inst->seqNum, inst->readPredTarg());
 DPRINTF(IEW, "[tid:%i] [sn:%llu] Execute: "
-"Redirecting fetch to PC: %#x, "
-"NPC: %#x.\n",
-tid,inst->seqNum,
-inst->nextInstAddr(),
-inst->nextInstAddr());
+"Redirecting fetch to PC: %s\n",
+tid, inst->seqNum, inst->pcState());
 // If incorrect, then signal the ROB that it must be squashed.
 squashDueToBranch(inst, tid);


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52052
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I30705f30a135d4ffbc3279c366dafb1184445c70
Gerrit-Change-Number: 52052
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: cpu-simple: Use PCStateBase instead of TheISA::PCState.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52050 )


Change subject: cpu-simple: Use PCStateBase instead of TheISA::PCState.
..

cpu-simple: Use PCStateBase instead of TheISA::PCState.

There are still occurrances of TheISA::PCState, but these are just for
compatibility with other interfaces.

Change-Id: I5538f1483608625221aab7f87a0d7d3ee5488b64
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52050
Reviewed-by: Daniel Carvalho 
Maintainer: Gabe Black 
Tested-by: kokoro 
---
M src/cpu/simple/exec_context.hh
M src/cpu/simple/base.cc
2 files changed, 22 insertions(+), 6 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 8854e9a..38caaf5 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -369,9 +369,10 @@
 // Use a fake sequence number since we only have one
 // instruction in flight at the same time.
 const InstSeqNum cur_sn(0);
-t_info.predPC = thread->pcState();
+set(t_info.predPC, thread->pcState());
 const bool predict_taken(
-branchPred->predict(curStaticInst, cur_sn, t_info.predPC,
+branchPred->predict(curStaticInst, cur_sn,
+t_info.predPC->as(),
 curThread));

 if (predict_taken)
@@ -386,8 +387,7 @@

 assert(curStaticInst);

-TheISA::PCState pc = threadContexts[curThread]->pcState();
-Addr instAddr = pc.instAddr();
+Addr instAddr = threadContexts[curThread]->pcState().instAddr();

 if (curStaticInst->isMemRef()) {
 t_info.execContextStats.numMemRefs++;
@@ -484,7 +484,7 @@
 // instruction in flight at the same time.
 const InstSeqNum cur_sn(0);

-if (t_info.predPC == thread->pcState()) {
+if (t_info.predPC->as() == thread->pcState()) {
 // Correctly predicted branch
 branchPred->update(cur_sn, curThread);
 } else {
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 53b5735..d652873 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -70,7 +70,7 @@
 bool stayAtPC;

 // Branch prediction
-TheISA::PCState predPC;
+std::unique_ptr predPC;

 /** PER-THREAD STATS */
 Counter numInst;

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52050
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I5538f1483608625221aab7f87a0d7d3ee5488b64
Gerrit-Change-Number: 52050
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Earl Ou 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: arch: Virtualize printing a PCState.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52039 )


 (

6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: arch: Virtualize printing a PCState.
..

arch: Virtualize printing a PCState.

Introduce a virtual output() method which prints the current PCState to
the given output stream, and define an overload for << to use that
method. This will make it possible to print a PCState without knowing
what the actual type is.

Change-Id: I663619b168c0b2b90c148b45ae16d77f03934e5b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52039
Tested-by: kokoro 
Reviewed-by: Bobby R. Bruce 
Maintainer: Bobby R. Bruce 
---
M src/arch/generic/pcstate.hh
1 file changed, 53 insertions(+), 35 deletions(-)

Approvals:
  Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/generic/pcstate.hh b/src/arch/generic/pcstate.hh
index 284797d..d4a317e 100644
--- a/src/arch/generic/pcstate.hh
+++ b/src/arch/generic/pcstate.hh
@@ -80,6 +80,8 @@

 virtual PCStateBase *clone() const = 0;

+virtual void output(std::ostream ) const = 0;
+
 virtual bool
 equals(const PCStateBase ) const
 {
@@ -123,6 +125,13 @@
 }
 };

+static inline std::ostream &
+operator<<(std::ostream & os, const PCStateBase )
+{
+pc.output(os);
+return os;
+}
+
 static inline bool
 operator==(const PCStateBase , const PCStateBase )
 {
@@ -189,6 +198,12 @@
 npc(val);
 }

+void
+output(std::ostream ) const override
+{
+ccprintf(os, "(%#x=>%#x)", this->pc(), this->npc());
+}
+
 bool
 equals(const PCStateBase ) const override
 {
@@ -268,14 +283,6 @@
 }
 };

-template 
-std::ostream &
-operator<<(std::ostream & os, const SimplePCState )
-{
-ccprintf(os, "(%#x=>%#x)", pc.pc(), pc.npc());
-return os;
-}
-
 // A PC and microcode PC.
 template 
 class UPCState : public SimplePCState
@@ -284,6 +291,13 @@
 typedef SimplePCState Base;

   public:
+void
+output(std::ostream ) const override
+{
+Base::output(os);
+ccprintf(os, ".(%d=>%d)", this->upc(), this->nupc());
+}
+
 PCStateBase *
 clone() const override
 {
@@ -328,15 +342,6 @@
 }
 };

-template 
-std::ostream &
-operator<<(std::ostream & os, const UPCState )
-{
-ccprintf(os, "(%#x=>%#x).(%d=>%d)",
-pc.pc(), pc.npc(), pc.upc(), pc.nupc());
-return os;
-}
-
 // A PC with a delay slot.
 template 
 class DelaySlotPCState : public SimplePCState
@@ -347,6 +352,12 @@
 Addr _nnpc;

   public:
+void
+output(std::ostream ) const override
+{
+ccprintf(os, "(%#x=>%#x=>%#x)", this->pc(), this->npc(), nnpc());
+}
+
 PCStateBase *
 clone() const override
 {
@@ -409,15 +420,6 @@
 }
 };

-template 
-std::ostream &
-operator<<(std::ostream & os, const DelaySlotPCState )
-{
-ccprintf(os, "(%#x=>%#x=>%#x)",
-pc.pc(), pc.npc(), pc.nnpc());
-return os;
-}
-
 // A PC with a delay slot and a microcode PC.
 template 
 class DelaySlotUPCState : public DelaySlotPCState
@@ -426,6 +428,13 @@
 typedef DelaySlotPCState Base;

   public:
+void
+output(std::ostream ) const override
+{
+Base::output(os);
+ccprintf(os, ".(%d=>%d)", this->upc(), this->nupc());
+}
+
 PCStateBase *
 clone() const override
 {
@@ -469,15 +478,6 @@
 }
 };

-template 
-std::ostream &
-operator<<(std::ostream & os, const DelaySlotUPCState )
-{
-ccprintf(os, "(%#x=>%#x=>%#x).(%d=>%d)",
-pc.pc(), pc.npc(), pc.nnpc(), pc.upc(), pc.nupc());
-return os;
-}
-
 }

 } // namespace gem5

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52039
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I663619b168c0b2b90c148b45ae16d77f03934e5b
Gerrit-Change-Number: 52039
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Bobby R. Bruce 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: arch: Add a newPCState method to the ISA class.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52040 )


Change subject: arch: Add a newPCState method to the ISA class.
..

arch: Add a newPCState method to the ISA class.

This method is the seed which creates a new PCState object of the
appropriate type. It can be used to initialize PCStateBase *s so that they
always point to something valid and can be manipulated without having to
first check if there's something there, as opposed to the alternative
where a pointer might be null until it's first pointed at something.

Change-Id: If06ee633846603acbfd2432f3d8bac6746a8b729
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52040
Reviewed-by: Daniel Carvalho 
Maintainer: Gabe Black 
Tested-by: kokoro 
---
M src/arch/generic/isa.hh
M src/arch/arm/fastmodel/iris/isa.hh
M src/arch/riscv/isa.hh
M src/arch/sparc/isa.hh
M src/arch/power/isa.hh
M src/arch/arm/isa.hh
M src/arch/x86/isa.hh
M src/arch/mips/isa.hh
8 files changed, 69 insertions(+), 1 deletion(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/fastmodel/iris/isa.hh  
b/src/arch/arm/fastmodel/iris/isa.hh

index 8caa994..f4f3b97 100644
--- a/src/arch/arm/fastmodel/iris/isa.hh
+++ b/src/arch/arm/fastmodel/iris/isa.hh
@@ -52,6 +52,12 @@
 ArmISA::CPSR cpsr = tc->readMiscRegNoEffect(ArmISA::MISCREG_CPSR);
 return ArmISA::inUserMode(cpsr);
 }
+
+PCStateBase *
+newPCState(Addr new_inst_addr=0) const override
+{
+return new ArmISA::PCState(new_inst_addr);
+}
 };

 } // namespace Iris
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index a4758d2..2d30ded 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -43,6 +43,7 @@

 #include "arch/arm/isa_device.hh"
 #include "arch/arm/mmu.hh"
+#include "arch/arm/pcstate.hh"
 #include "arch/arm/regs/int.hh"
 #include "arch/arm/regs/misc.hh"
 #include "arch/arm/self_debug.hh"
@@ -868,6 +869,12 @@

 void setupThreadContext();

+PCStateBase *
+newPCState(Addr new_inst_addr=0) const override
+{
+return new PCState(new_inst_addr);
+}
+
 void takeOverFrom(ThreadContext *new_tc,
   ThreadContext *old_tc) override;

diff --git a/src/arch/generic/isa.hh b/src/arch/generic/isa.hh
index ba047df..a49b17a 100644
--- a/src/arch/generic/isa.hh
+++ b/src/arch/generic/isa.hh
@@ -42,6 +42,7 @@

 #include 

+#include "arch/generic/pcstate.hh"
 #include "cpu/reg_class.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
@@ -66,6 +67,7 @@
 RegClasses _regClasses;

   public:
+virtual PCStateBase *newPCState(Addr new_inst_addr=0) const = 0;
 virtual void takeOverFrom(ThreadContext *new_tc, ThreadContext  
*old_tc) {}

 virtual void setThreadContext(ThreadContext *_tc) { tc = _tc; }

diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh
index 915039f..c7fbac7 100644
--- a/src/arch/mips/isa.hh
+++ b/src/arch/mips/isa.hh
@@ -34,6 +34,7 @@
 #include 

 #include "arch/generic/isa.hh"
+#include "arch/mips/pcstate.hh"
 #include "arch/mips/regs/misc.hh"
 #include "arch/mips/types.hh"
 #include "base/types.hh"
@@ -78,6 +79,12 @@
   public:
 void clear();

+PCStateBase *
+newPCState(Addr new_inst_addr=0) const override
+{
+return new PCState(new_inst_addr);
+}
+
   public:
 void configCP();

diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh
index d563ebe..6c42f32 100644
--- a/src/arch/power/isa.hh
+++ b/src/arch/power/isa.hh
@@ -32,6 +32,7 @@
 #define __ARCH_POWER_ISA_HH__

 #include "arch/generic/isa.hh"
+#include "arch/power/pcstate.hh"
 #include "arch/power/regs/misc.hh"
 #include "arch/power/types.hh"
 #include "base/logging.hh"
@@ -58,6 +59,12 @@
   public:
 void clear() {}

+PCStateBase *
+newPCState(Addr new_inst_addr=0) const override
+{
+return new PCState(new_inst_addr);
+}
+
   public:
 RegVal
 readMiscRegNoEffect(int misc_reg) const
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 4521c52..6435b74 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -37,6 +37,7 @@
 #include 

 #include "arch/generic/isa.hh"
+#include "arch/riscv/pcstate.hh"
 #include "arch/riscv/types.hh"
 #include "base/types.hh"

@@ -76,6 +77,12 @@

 void clear();

+PCStateBase *
+newPCState(Addr new_inst_addr=0) const override
+{
+return new PCState(new_inst_addr);
+}
+
   public:
 RegVal readMiscRegNoEffect(int misc_reg) const;
 RegVal readMiscReg(int misc_reg);
diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh
index 4d2b15c..f26de40 100644
--- a/src/arch/sparc/isa.hh
+++ b/src/arch/sparc/isa.hh
@@ -33,6 +33,7 @@
 #include 

 #include "arch/generic/isa.hh"
+#include 

[gem5-dev] Change in gem5/gem5[develop]: arch,cpu: Use PCStateBase in StaticInst::branchTarget

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52045 )


 (

9 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: arch,cpu: Use PCStateBase in StaticInst::branchTarget
..

arch,cpu: Use PCStateBase in StaticInst::branchTarget

Change-Id: I1b8a2ea088b52252601968b1b1083ed712a5bfd6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52045
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/cpu/o3/decode.cc
M src/arch/power/insts/branch.cc
M src/arch/power/insts/branch.hh
M src/arch/x86/insts/microop.cc
M src/arch/x86/insts/microop.hh
M src/cpu/o3/dyn_inst.hh
M src/arch/arm/isa/templates/branch.isa
M src/cpu/nop_static_inst.cc
M src/cpu/static_inst.cc
M src/cpu/static_inst.hh
M src/arch/arm/isa/insts/branch.isa
M src/arch/arm/insts/branch64.cc
M src/arch/arm/insts/branch64.hh
M src/arch/x86/isa/microops/regop.isa
M src/arch/x86/isa/microops/seqop.isa
M src/arch/mips/isa/formats/branch.isa
M src/arch/riscv/isa/formats/standard.isa
17 files changed, 137 insertions(+), 105 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc
index b8a22fc..4c9552d 100644
--- a/src/arch/arm/insts/branch64.cc
+++ b/src/arch/arm/insts/branch64.cc
@@ -43,31 +43,34 @@
 namespace ArmISA
 {

-ArmISA::PCState
-BranchImm64::branchTarget(const ArmISA::PCState ) const
+std::unique_ptr
+BranchImm64::branchTarget(const PCStateBase _pc) const
 {
-ArmISA::PCState pcs = branchPC;
-pcs.instNPC(pcs.pc() + imm);
-pcs.advance();
-return pcs;
+PCStateBase *pcs = branch_pc.clone();
+auto  = pcs->as();
+apc.instNPC(apc.pc() + imm);
+apc.advance();
+return std::unique_ptr{pcs};
 }

-ArmISA::PCState
-BranchImmReg64::branchTarget(const ArmISA::PCState ) const
+std::unique_ptr
+BranchImmReg64::branchTarget(const PCStateBase _pc) const
 {
-ArmISA::PCState pcs = branchPC;
-pcs.instNPC(pcs.pc() + imm);
-pcs.advance();
-return pcs;
+PCStateBase *pcs = branch_pc.clone();
+auto  = pcs->as();
+apc.instNPC(apc.pc() + imm);
+apc.advance();
+return std::unique_ptr{pcs};
 }

-ArmISA::PCState
-BranchImmImmReg64::branchTarget(const ArmISA::PCState ) const
+std::unique_ptr
+BranchImmImmReg64::branchTarget(const PCStateBase _pc) const
 {
-ArmISA::PCState pcs = branchPC;
-pcs.instNPC(pcs.pc() + imm2);
-pcs.advance();
-return pcs;
+PCStateBase *pcs = branch_pc.clone();
+auto  = pcs->as();
+apc.instNPC(apc.pc() + imm2);
+apc.advance();
+return std::unique_ptr{pcs};
 }

 std::string
diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh
index 10483a6..551ade7 100644
--- a/src/arch/arm/insts/branch64.hh
+++ b/src/arch/arm/insts/branch64.hh
@@ -57,8 +57,8 @@
 ArmStaticInst(mnem, _machInst, __opClass), imm(_imm)
 {}

-ArmISA::PCState branchTarget(
-const ArmISA::PCState ) const override;
+std::unique_ptr branchTarget(
+const PCStateBase _pc) const override;

 /// Explicitly import the otherwise hidden branchTarget
 using StaticInst::branchTarget;
@@ -180,8 +180,8 @@
 ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1)
 {}

-ArmISA::PCState branchTarget(
-const ArmISA::PCState ) const override;
+std::unique_ptr branchTarget(
+const PCStateBase _pc) const override;

 /// Explicitly import the otherwise hidden branchTarget
 using StaticInst::branchTarget;
@@ -206,8 +206,8 @@
 imm1(_imm1), imm2(_imm2), op1(_op1)
 {}

-ArmISA::PCState branchTarget(
-const ArmISA::PCState ) const override;
+std::unique_ptr branchTarget(
+const PCStateBase _pc) const override;

 /// Explicitly import the otherwise hidden branchTarget
 using StaticInst::branchTarget;
diff --git a/src/arch/arm/isa/insts/branch.isa  
b/src/arch/arm/isa/insts/branch.isa

index 91826a2..fd48bad 100644
--- a/src/arch/arm/isa/insts/branch.isa
+++ b/src/arch/arm/isa/insts/branch.isa
@@ -46,7 +46,7 @@
 bCode = '''
 NPC = (uint32_t)(PC + imm);
 '''
-br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() +  
imm));'''

+br_tgt_code = '''pcs.instNPC((uint32_t)(pcs.instPC() + imm));'''
 instFlags = ["IsDirectControl"]
 if (link):
 bCode += '''
@@ -86,8 +86,8 @@
 # of the current ISA. Thumb is whether the target is ARM.
 newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC +  
imm))'

 br_tgt_code = '''
-pcs.instNPC((uint32_t)(branchPC.thumb() ?  
(roundDown(branchPC.instPC(),4) + imm) :

-

[gem5-dev] Change in gem5/gem5[develop]: arch,cpu: Use PCStateBase in buildRetPC.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52044 )


 (

4 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: arch,cpu: Use PCStateBase in buildRetPC.
..

arch,cpu: Use PCStateBase in buildRetPC.

Change-Id: I9d9f5b25613440737b1d67134fcae09aa68baf8b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52044
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/cpu/pred/bpred_unit.cc
M src/arch/arm/insts/static_inst.hh
M src/arch/x86/insts/static_inst.hh
M src/arch/power/insts/static_inst.hh
M src/cpu/static_inst.hh
M src/arch/mips/isa/base.isa
M src/arch/riscv/insts/static_inst.hh
M src/arch/sparc/insts/static_inst.hh
8 files changed, 59 insertions(+), 35 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/insts/static_inst.hh  
b/src/arch/arm/insts/static_inst.hh

index 3979a49..07181c2 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -205,12 +205,13 @@

 uint64_t getEMI() const override { return machInst; }

-PCState
-buildRetPC(const PCState , const PCState ) const override
+std::unique_ptr
+buildRetPC(const PCStateBase _pc,
+const PCStateBase _pc) const override
 {
-PCState retPC = callPC;
-retPC.uEnd();
-return retPC;
+PCStateBase *ret_pc = call_pc.clone();
+ret_pc->as().uEnd();
+return std::unique_ptr{ret_pc};
 }

 std::string generateDisassembly(
diff --git a/src/arch/mips/isa/base.isa b/src/arch/mips/isa/base.isa
index b02a8c4..cd13de2 100644
--- a/src/arch/mips/isa/base.isa
+++ b/src/arch/mips/isa/base.isa
@@ -64,13 +64,18 @@
 pc.as().advance();
 }

-PCState
-buildRetPC(const PCState , const PCState ) const  
override

+std::unique_ptr
+buildRetPC(const PCStateBase _pc,
+const PCStateBase _pc) const override
 {
-PCState ret = callPC;
+PCStateBase *ret_pc = call_pc.clone();
+
+auto  = ret_pc->as();
+
 ret.advance();
-ret.pc(curPC.npc());
-return ret;
+ret.pc(cur_pc.as().npc());
+
+return std::unique_ptr{ret_pc};
 }

 size_t
diff --git a/src/arch/power/insts/static_inst.hh  
b/src/arch/power/insts/static_inst.hh

index bbe93fe..eaa0100 100644
--- a/src/arch/power/insts/static_inst.hh
+++ b/src/arch/power/insts/static_inst.hh
@@ -74,12 +74,13 @@
 pc_state.as().advance();
 }

-PCState
-buildRetPC(const PCState , const PCState ) const override
+std::unique_ptr
+buildRetPC(const PCStateBase _pc,
+const PCStateBase _pc) const override
 {
-PCState retPC = callPC;
-retPC.advance();
-return retPC;
+PCStateBase *ret_pc = call_pc.clone();
+ret_pc->as().advance();
+return std::unique_ptr{ret_pc};
 }

 size_t
diff --git a/src/arch/riscv/insts/static_inst.hh  
b/src/arch/riscv/insts/static_inst.hh

index 1e2ec28..ed180ed 100644
--- a/src/arch/riscv/insts/static_inst.hh
+++ b/src/arch/riscv/insts/static_inst.hh
@@ -64,13 +64,15 @@
 pc.as().advance();
 }

-PCState
-buildRetPC(const PCState , const PCState ) const override
+std::unique_ptr
+buildRetPC(const PCStateBase _pc,
+const PCStateBase _pc) const override
 {
-PCState retPC = callPC;
-retPC.advance();
-retPC.pc(curPC.npc());
-return retPC;
+PCStateBase *ret_pc_ptr = call_pc.clone();
+auto _pc = ret_pc_ptr->as();
+ret_pc.advance();
+ret_pc.pc(cur_pc.as().npc());
+return std::unique_ptr{ret_pc_ptr};
 }

 size_t
diff --git a/src/arch/sparc/insts/static_inst.hh  
b/src/arch/sparc/insts/static_inst.hh

index b2dfd5e..c5cec25 100644
--- a/src/arch/sparc/insts/static_inst.hh
+++ b/src/arch/sparc/insts/static_inst.hh
@@ -122,13 +122,15 @@
 return simpleAsBytes(buf, size, machInst);
 }

-PCState
-buildRetPC(const PCState , const PCState ) const override
+std::unique_ptr
+buildRetPC(const PCStateBase _pc,
+const PCStateBase _pc) const override
 {
-PCState ret = callPC;
+PCStateBase *ret_ptr = call_pc.clone();
+auto  = ret_ptr->as();
 ret.uEnd();
-ret.pc(curPC.npc());
-return ret;
+ret.pc(cur_pc.as().npc());
+return std::unique_ptr{ret_ptr};
 }
 };

diff --git a/src/arch/x86/insts/static_inst.hh  
b/src/arch/x86/insts/static_inst.hh

index 1654e81..d890904 100644
--- a/src/arch/x86/insts/static_inst.hh
+++ b/src/arch/x86/insts/static_inst.hh
@@ -204,12 +204,13 @@
   

[gem5-dev] Change in gem5/gem5[develop]: arch,cpu,sim: Use PCState * and & to trace and not TheISA::PCState.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52042 )


 (

4 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: arch,cpu,sim: Use PCState * and & to trace and not  
TheISA::PCState.

..

arch,cpu,sim: Use PCState * and & to trace and not TheISA::PCState.

Change-Id: Ia31919ef19f973aa7af973889366412f3999342a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52042
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/arch/arm/tracers/tarmac_record_v8.hh
M src/cpu/inst_pb_trace.cc
M src/cpu/inst_pb_trace.hh
M src/cpu/exetrace.cc
M src/cpu/exetrace.hh
M src/sim/insttracer.hh
M src/cpu/inteltrace.cc
M src/cpu/inteltrace.hh
M src/cpu/nativetrace.hh
M src/arch/arm/tracers/tarmac_record.cc
M src/arch/arm/tracers/tarmac_record.hh
M src/arch/arm/tracers/tarmac_base.cc
M src/arch/arm/tracers/tarmac_base.hh
M src/arch/arm/tracers/tarmac_parser.cc
M src/arch/arm/tracers/tarmac_parser.hh
M src/arch/arm/tracers/tarmac_tracer.cc
M src/arch/arm/tracers/tarmac_tracer.hh
17 files changed, 104 insertions(+), 83 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/tracers/tarmac_base.cc  
b/src/arch/arm/tracers/tarmac_base.cc

index d0f0bcd..c8a8619 100644
--- a/src/arch/arm/tracers/tarmac_base.cc
+++ b/src/arch/arm/tracers/tarmac_base.cc
@@ -54,7 +54,7 @@

 TarmacBaseRecord::TarmacBaseRecord(Tick _when, ThreadContext *_thread,
const StaticInstPtr _staticInst,
-   PCState _pc,
+   const PCStateBase &_pc,
const StaticInstPtr _macroStaticInst)
 : InstRecord(_when, _thread, _staticInst, _pc, _macroStaticInst)
 {
@@ -62,7 +62,7 @@

 TarmacBaseRecord::InstEntry::InstEntry(
 ThreadContext* thread,
-PCState pc,
+const PCStateBase ,
 const StaticInstPtr staticInst,
 bool predicate)
 : taken(predicate) ,
@@ -83,7 +83,7 @@
   [](char& c) { c = toupper(c); });
 }

-TarmacBaseRecord::RegEntry::RegEntry(PCState pc)
+TarmacBaseRecord::RegEntry::RegEntry(const PCStateBase )
   : isetstate(pcToISetState(pc)),
 values(2, 0)
 {
@@ -100,15 +100,16 @@
 }

 TarmacBaseRecord::ISetState
-TarmacBaseRecord::pcToISetState(PCState pc)
+TarmacBaseRecord::pcToISetState(const PCStateBase )
 {
+auto  = pc.as();
 TarmacBaseRecord::ISetState isetstate;

-if (pc.aarch64())
+if (apc.aarch64())
 isetstate = TarmacBaseRecord::ISET_A64;
-else if (!pc.thumb() && !pc.jazelle())
+else if (!apc.thumb() && !apc.jazelle())
 isetstate = TarmacBaseRecord::ISET_ARM;
-else if (pc.thumb() && !pc.jazelle())
+else if (apc.thumb() && !apc.jazelle())
 isetstate = TarmacBaseRecord::ISET_THUMB;
 else
 // No Jazelle state in TARMAC
diff --git a/src/arch/arm/tracers/tarmac_base.hh  
b/src/arch/arm/tracers/tarmac_base.hh

index a29eb74..cbf87a3 100644
--- a/src/arch/arm/tracers/tarmac_base.hh
+++ b/src/arch/arm/tracers/tarmac_base.hh
@@ -85,7 +85,7 @@
 {
 InstEntry() = default;
 InstEntry(ThreadContext* thread,
-  ArmISA::PCState pc,
+  const PCStateBase ,
   const StaticInstPtr staticInst,
   bool predicate);

@@ -109,7 +109,7 @@
 };

 RegEntry() = default;
-RegEntry(ArmISA::PCState pc);
+RegEntry(const PCStateBase );

 RegType type;
 RegIndex index;
@@ -130,8 +130,8 @@

   public:
 TarmacBaseRecord(Tick _when, ThreadContext *_thread,
- const StaticInstPtr _staticInst, ArmISA::PCState _pc,
- const StaticInstPtr _macroStaticInst = NULL);
+ const StaticInstPtr _staticInst, const PCStateBase  
&_pc,

+ const StaticInstPtr _macroStaticInst=nullptr);

 virtual void dump() = 0;

@@ -142,7 +142,7 @@
  * @param pc program counter (PCState) variable
  * @return Instruction Set State for the given PCState
  */
-static ISetState pcToISetState(ArmISA::PCState pc);
+static ISetState pcToISetState(const PCStateBase );
 };


diff --git a/src/arch/arm/tracers/tarmac_parser.cc  
b/src/arch/arm/tracers/tarmac_parser.cc

index e955314..45a6332 100644
--- a/src/arch/arm/tracers/tarmac_parser.cc
+++ b/src/arch/arm/tracers/tarmac_parser.cc
@@ -918,7 +918,7 @@

 if (!same) {
 if (!mismatch) {
-TarmacParserRecord::printMismatchHeader(inst, pc);
+TarmacParserRecord::printMismatchHeader(inst, *pc);
 mismatch = true;
 }
 outs << "diff> [" << it->repr << "] gem5: 0x" << 

[gem5-dev] Change in gem5/gem5[develop]: arch: Add a virtually backed PCState == operator.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52038 )


Change subject: arch: Add a virtually backed PCState == operator.
..

arch: Add a virtually backed PCState == operator.

Define a == operator and an equals() virtual method for the PCStateBase
class which it calls. The equals method will compare the owning PCState
instance with another instance of the same PCState class.

Change-Id: Ia704f1bc9e1217ce191671fe574c226ee1b73278
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52038
Maintainer: Gabe Black 
Tested-by: kokoro 
Reviewed-by: Earl Ou 
Reviewed-by: Daniel Carvalho 
---
M src/arch/generic/pcstate.hh
M src/arch/arm/pcstate.hh
2 files changed, 46 insertions(+), 55 deletions(-)

Approvals:
  Earl Ou: Looks good to me, approved
  Daniel Carvalho: Looks good to me, but someone else must approve
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/pcstate.hh b/src/arch/arm/pcstate.hh
index d2f3cef..b1d0b74 100644
--- a/src/arch/arm/pcstate.hh
+++ b/src/arch/arm/pcstate.hh
@@ -376,9 +376,10 @@
 }

 bool
-operator == (const PCState ) const
+equals(const PCStateBase ) const override
 {
-return Base::operator == (opc) &&
+auto  = other.as();
+return Base::equals(other) &&
 flags == opc.flags && nextFlags == opc.nextFlags &&
 _itstate == opc._itstate &&
 _nextItstate == opc._nextItstate &&
@@ -387,12 +388,6 @@
 _stepped == opc._stepped;
 }

-bool
-operator != (const PCState ) const
-{
-return !(*this == opc);
-}
-
 void
 serialize(CheckpointOut ) const override
 {
diff --git a/src/arch/generic/pcstate.hh b/src/arch/generic/pcstate.hh
index 32ee206..284797d 100644
--- a/src/arch/generic/pcstate.hh
+++ b/src/arch/generic/pcstate.hh
@@ -80,6 +80,12 @@

 virtual PCStateBase *clone() const = 0;

+virtual bool
+equals(const PCStateBase ) const
+{
+return _pc == other._pc && _upc == other._upc;
+}
+
 /**
  * Returns the memory address of the instruction this PC points to.
  *
@@ -117,6 +123,18 @@
 }
 };

+static inline bool
+operator==(const PCStateBase , const PCStateBase )
+{
+return a.equals(b);
+}
+
+static inline bool
+operator!=(const PCStateBase , const PCStateBase )
+{
+return !a.equals(b);
+}
+
 namespace GenericISA
 {

@@ -172,15 +190,11 @@
 }

 bool
-operator == (const PCStateCommon ) const
+equals(const PCStateBase ) const override
 {
-return _pc == opc._pc && _npc == opc._npc;
-}
-
-bool
-operator != (const PCStateCommon ) const
-{
-return !(*this == opc);
+auto  = other.as();
+return PCStateBase::equals(other) &&
+_npc == ps._npc && _nupc == ps._nupc;
 }

 void
@@ -312,19 +326,6 @@
 this->upc(0);
 this->nupc(1);
 }
-
-bool
-operator == (const UPCState ) const
-{
-return this->pc() == opc.pc() && this->npc() == opc.npc() &&
-   this->upc() == opc.upc() && this->nupc() == opc.nupc();
-}
-
-bool
-operator != (const UPCState ) const
-{
-return !(*this == opc);
-}
 };

 template 
@@ -387,17 +388,10 @@
 }

 bool
-operator == (const DelaySlotPCState ) const
+equals(const PCStateBase ) const override
 {
-return this->_pc == opc._pc &&
-   this->_npc == opc._npc &&
-   this->_nnpc == opc._nnpc;
-}
-
-bool
-operator != (const DelaySlotPCState ) const
-{
-return !(*this == opc);
+auto  = other.as>();
+return Base::equals(other) && ps._nnpc == this->_nnpc;
 }

 void
@@ -473,22 +467,6 @@
 this->_upc = 0;
 this->_nupc = 1;
 }
-
-bool
-operator == (const DelaySlotUPCState ) const
-{
-return this->_pc == opc._pc &&
-   this->_npc == opc._npc &&
-   this->_nnpc == opc._nnpc &&
-   this->_upc == opc._upc &&
-   this->_nupc == opc._nupc;
-}
-
-bool
-operator != (const DelaySlotUPCState ) const
-{
-return !(*this == opc);
-}
 };

 template 

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52038
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia704f1bc9e1217ce191671fe574c226ee1b73278
Gerrit-Change-Number: 52038
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Earl Ou 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

[gem5-dev] Change in gem5/gem5[develop]: cpu-minor: Use PCStateBase in the minor CPU DynInst class.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52047 )


 (

5 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: cpu-minor: Use PCStateBase in the minor CPU DynInst class.
..

cpu-minor: Use PCStateBase in the minor CPU DynInst class.

Change-Id: I43d538568d473e27cdbfe6ea77c317b18cfdf18f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52047
Tested-by: kokoro 
Reviewed-by: ZHENGRONG WANG 
Maintainer: ZHENGRONG WANG 
---
M src/cpu/minor/execute.cc
M src/cpu/minor/decode.cc
M src/cpu/minor/fetch2.cc
M src/cpu/minor/dyn_inst.cc
M src/cpu/minor/dyn_inst.hh
M src/cpu/minor/exec_context.hh
M src/cpu/minor/lsq.cc
7 files changed, 63 insertions(+), 55 deletions(-)

Approvals:
  ZHENGRONG WANG: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc
index ab908e0..e82811f 100644
--- a/src/cpu/minor/decode.cc
+++ b/src/cpu/minor/decode.cc
@@ -115,7 +115,7 @@
 {
 inst->traceData = cpu.getTracer()->getInstRecord(curTick(),
 cpu.getContext(inst->id.threadId),
-inst->staticInst, inst->pc, static_inst);
+inst->staticInst, *inst->pc, static_inst);

 /* Use the execSeqNum as the fetch sequence number as this most closely
  *  matches the other processor models' idea of fetch sequence */
@@ -176,7 +176,7 @@

 /* Set up PC for the next micro-op emitted */
 if (!decode_info.inMacroop) {
-decode_info.microopPC = inst->pc;
+set(decode_info.microopPC, *inst->pc);
 decode_info.inMacroop = true;
 }

@@ -188,14 +188,15 @@

 output_inst =
 new MinorDynInst(static_micro_inst, inst->id);
-output_inst->pc = decode_info.microopPC;
+set(output_inst->pc, decode_info.microopPC);
 output_inst->fault = NoFault;

 /* Allow a predicted next address only on the last
  *  microop */
 if (static_micro_inst->isLastMicroop()) {
 output_inst->predictedTaken = inst->predictedTaken;
-output_inst->predictedTarget =  
inst->predictedTarget;

+set(output_inst->predictedTarget,
+inst->predictedTarget);
 }

 DPRINTF(Decode, "Microop decomposition inputIndex:"
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index 5a4ef37..8a07647 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -120,7 +120,7 @@
 operator <<(std::ostream , const MinorDynInst )
 {
 os << inst.id << " pc: 0x"
-<< std::hex << inst.pc.instAddr() << std::dec << " (";
+<< std::hex << inst.pc->instAddr() << std::dec << " (";

 if (inst.isFault())
 os << "fault: \"" << inst.fault->name() << '"';
@@ -180,7 +180,7 @@
 {
 if (isFault()) {
 minorInst(named_object, "id=F;%s addr=0x%x fault=\"%s\"\n",
-id, pc.instAddr(), fault->name());
+id, pc->instAddr(), fault->name());
 } else {
 unsigned int num_src_regs = staticInst->numSrcRegs();
 unsigned int num_dest_regs = staticInst->numDestRegs();
@@ -222,7 +222,7 @@

 minorInst(named_object, "id=%s addr=0x%x inst=\"%s\" class=%s"
 " flags=\"%s\"%s%s\n",
-id, pc.instAddr(),
+id, pc->instAddr(),
 (staticInst->opClass() == No_OpClass ?
 "(invalid)" : staticInst->disassemble(0,NULL)),
 enums::OpClassStrings[staticInst->opClass()],
diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh
index d71ccec..96a1649 100644
--- a/src/cpu/minor/dyn_inst.hh
+++ b/src/cpu/minor/dyn_inst.hh
@@ -173,64 +173,64 @@
 InstId id;

 /** Trace information for this instruction's execution */
-Trace::InstRecord *traceData;
+Trace::InstRecord *traceData = nullptr;

 /** The fetch address of this instruction */
-TheISA::PCState pc;
+std::unique_ptr pc;

 /** This is actually a fault masquerading as an instruction */
 Fault fault;

 /** Tried to predict the destination of this inst (if a control
  *  instruction or a sys call) */
-bool triedToPredict;
+bool triedToPredict = false;

 /** This instruction was predicted to change control flow and
  *  the following instructions will have a newer predictionSeqNum */
-bool predictedTaken;
+bool predictedTaken = false;

 /** Predicted branch target */
-TheISA::PCState predictedTarget;
+std::unique_ptr predictedTarget;

 /** Fields only set during execution */


[gem5-dev] Change in gem5/gem5[develop]: arch: Add a "set" function to set one PCState to another.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52041 )


 (

3 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: arch: Add a "set" function to set one PCState to another.
..

arch: Add a "set" function to set one PCState to another.

Most of the time, the type pointed to by a PCState pointer or reference
will be the same as all the others, if not nullptr.

This change adds a set of "set" functions which assume that the
underlying type of each pointer or reference are the same, and handles
casting, copying things over, creating a new copy, etc, for you. It uses
a new "update" virtual method on PCState subclasses which casts the
source to the same type as the destination and copies values over.

Note that the "set" function doesn't actually verify that the two types
are the same, just like the overloaded ==, != and << operators. In the
future, those checks can be added for debugging purposes, probably
guarded by a configuration variable which can be toggled on or off to
get better performance or more thorough error checking.

The main advantage of these wrappers are that they allows consistent
semantics whether your moving a value from a pointer, or from a yet
unconverted PCState subclass, or vice versa, which will be particularly
helpful while transitioning between using raw PCState instances and
using primarily pointers and references.

This change also adds wrappers which handle std::unique_ptr, which makes
it easier to use them as arguments to these functions. Otherwise, if the
std::unique_ptr is a temporary value, using the return value of .get()
will let the std::unique_ptr go out of scope, making it delete the data
pointed to by the returned pointed. By keeping the std::unique_ptr
around on the stack, that prevents it from going out of scope.

Change-Id: I2c737b08e0590a2c46e212a7b9efa543bdb81ad3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52041
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/arch/x86/pcstate.hh
M src/arch/generic/pcstate.hh
M src/arch/arm/pcstate.hh
M src/arch/power/pcstate.hh
M src/arch/riscv/pcstate.hh
5 files changed, 188 insertions(+), 0 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/pcstate.hh b/src/arch/arm/pcstate.hh
index b1d0b74..122cd9b 100644
--- a/src/arch/arm/pcstate.hh
+++ b/src/arch/arm/pcstate.hh
@@ -111,6 +111,21 @@

 PCStateBase *clone() const override { return new PCState(*this); }

+void
+update(const PCStateBase ) override
+{
+Base::update(other);
+auto  = other.as();
+flags = pcstate.flags;
+nextFlags = pcstate.nextFlags;
+_itstate = pcstate._itstate;
+_nextItstate = pcstate._nextItstate;
+_size = pcstate._size;
+_illegalExec = pcstate._illegalExec;
+_debugStep = pcstate._debugStep;
+_stepped = pcstate._stepped;
+}
+
 bool
 illegalExec() const
 {
diff --git a/src/arch/generic/pcstate.hh b/src/arch/generic/pcstate.hh
index d4a317e..a1348a5 100644
--- a/src/arch/generic/pcstate.hh
+++ b/src/arch/generic/pcstate.hh
@@ -42,7 +42,10 @@
 #define __ARCH_GENERIC_TYPES_HH__

 #include 
+#include 
+#include 

+#include "base/compiler.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
 #include "sim/serialize.hh"
@@ -79,6 +82,13 @@
 }

 virtual PCStateBase *clone() const = 0;
+virtual void
+update(const PCStateBase )
+{
+_pc = other._pc;
+_upc = other._upc;
+}
+void update(const PCStateBase *ptr) { update(*ptr); }

 virtual void output(std::ostream ) const = 0;

@@ -144,6 +154,86 @@
 return !a.equals(b);
 }

+namespace
+{
+
+inline void
+set(PCStateBase *, const PCStateBase *src)
+{
+if (GEM5_LIKELY(dest)) {
+if (GEM5_LIKELY(src)) {
+// Both src and dest already have storage, so just copy  
contents.

+dest->update(src);
+} else {
+// src is empty, so clear out dest.
+dest = nullptr;
+}
+} else {
+if (GEM5_LIKELY(src)) {
+// dest doesn't have storage, so create some as a copy of src.
+dest = src->clone();
+} else {
+// dest is already nullptr, so nothing to do.
+}
+}
+}
+
+inline void
+set(std::unique_ptr , const PCStateBase *src)
+{
+PCStateBase *dest_ptr = dest.get();
+set(dest_ptr, src);
+if (dest.get() != dest_ptr)
+dest.reset(dest_ptr);
+}
+
+inline void
+set(PCStateBase *, const std::unique_ptr )
+{
+const PCStateBase *src_ptr = src.get();
+set(dest, src_ptr);
+}
+
+inline void
+set(std::unique_ptr ,
+const std::unique_ptr )
+{
+PCStateBase 

[gem5-dev] Change in gem5/gem5[develop]: arch,cpu,sim: Use PCStateBase in advancePC.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52043 )


 (

4 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.

 )Change subject: arch,cpu,sim: Use PCStateBase in advancePC.
..

arch,cpu,sim: Use PCStateBase in advancePC.

By using a PCStateBase pointer or reference, we can (mostly) avoid
having to know what the ISA specific PState class is, letting the ISA
specific instruction classes cast to the type they need internally.

There are a couple minor places where we need to do those casts outside
of ISA specific types, one in the generic NopStaticInstPtr class, and a
few in generic faults.

Right now, we'll just use the TheISA::PCState type in those isolated
spots (sometimes hidden by auto), and deal with it later, possibly
with a virtual "advance" method of some sort.

Change-Id: I774c67dc648a85556230f601e087211b3d5630a9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52043
Tested-by: kokoro 
Maintainer: Gabe Black 
Reviewed-by: Daniel Carvalho 
---
M src/arch/arm/insts/static_inst.hh
M src/arch/x86/insts/microop.hh
M src/arch/x86/insts/static_inst.hh
M src/arch/power/insts/static_inst.hh
M src/cpu/nop_static_inst.cc
M src/cpu/static_inst.hh
M src/arch/mips/isa/base.isa
M src/arch/arm/insts/macromem.hh
M src/arch/riscv/insts/static_inst.cc
M src/arch/riscv/insts/static_inst.hh
M src/arch/sparc/insts/micro.hh
M src/arch/arm/insts/mem.hh
M src/arch/arm/insts/mem64.hh
M src/arch/arm/insts/pred_inst.hh
M src/arch/arm/insts/vfp.hh
M src/arch/sparc/insts/static_inst.cc
M src/arch/sparc/insts/static_inst.hh
17 files changed, 100 insertions(+), 48 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Gabe Black: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh
index 8541131..1dd69ea 100644
--- a/src/arch/arm/insts/macromem.hh
+++ b/src/arch/arm/insts/macromem.hh
@@ -42,6 +42,7 @@
 #define __ARCH_ARM_MACROMEM_HH__

 #include "arch/arm/insts/pred_inst.hh"
+#include "arch/arm/pcstate.hh"
 #include "arch/arm/tlb.hh"

 namespace gem5
@@ -75,14 +76,15 @@

   public:
 void
-advancePC(PCState ) const override
+advancePC(PCStateBase ) const override
 {
+auto  = pcState.as();
 if (flags[IsLastMicroop]) {
-pcState.uEnd();
+apc.uEnd();
 } else if (flags[IsMicroop]) {
-pcState.uAdvance();
+apc.uAdvance();
 } else {
-pcState.advance();
+apc.advance();
 }
 }
 };
@@ -96,14 +98,15 @@

   public:
 void
-advancePC(PCState ) const override
+advancePC(PCStateBase ) const override
 {
+auto  = pcState.as();
 if (flags[IsLastMicroop]) {
-pcState.uEnd();
+apc.uEnd();
 } else if (flags[IsMicroop]) {
-pcState.uAdvance();
+apc.uAdvance();
 } else {
-pcState.advance();
+apc.advance();
 }
 }
 };
diff --git a/src/arch/arm/insts/mem.hh b/src/arch/arm/insts/mem.hh
index 7bee981..4a2c8a0 100644
--- a/src/arch/arm/insts/mem.hh
+++ b/src/arch/arm/insts/mem.hh
@@ -42,6 +42,7 @@
 #define __ARCH_ARM_MEM_HH__

 #include "arch/arm/insts/pred_inst.hh"
+#include "arch/arm/pcstate.hh"

 namespace gem5
 {
@@ -57,14 +58,15 @@
 {}

 void
-advancePC(PCState ) const override
+advancePC(PCStateBase ) const override
 {
+auto  = pcState.as();
 if (flags[IsLastMicroop]) {
-pcState.uEnd();
+apc.uEnd();
 } else if (flags[IsMicroop]) {
-pcState.uAdvance();
+apc.uAdvance();
 } else {
-pcState.advance();
+apc.advance();
 }
 }
 };
diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh
index 7fb168c..e2dd5dd 100644
--- a/src/arch/arm/insts/mem64.hh
+++ b/src/arch/arm/insts/mem64.hh
@@ -40,6 +40,7 @@

 #include "arch/arm/insts/misc64.hh"
 #include "arch/arm/insts/static_inst.hh"
+#include "arch/arm/pcstate.hh"

 namespace gem5
 {
@@ -75,14 +76,15 @@
 {}

 void
-advancePC(PCState ) const override
+advancePC(PCStateBase ) const override
 {
+auto  = pcState.as();
 if (flags[IsLastMicroop]) {
-pcState.uEnd();
+apc.uEnd();
 } else if (flags[IsMicroop]) {
-pcState.uAdvance();
+apc.uAdvance();
 } else {
-pcState.advance();
+apc.advance();
 }
 }
 };
diff --git a/src/arch/arm/insts/pred_inst.hh  
b/src/arch/arm/insts/pred_inst.hh

index 2e35622..1f62614 100644
--- a/src/arch/arm/insts/pred_inst.hh
+++ b/src/arch/arm/insts/pred_inst.hh
@@ -42,6 +42,7 @@
 #define __ARCH_ARM_INSTS_PREDINST_HH__

 #include "arch/arm/insts/static_inst.hh"

[gem5-dev] Change in gem5/gem5[develop]: misc: Use a PCStateBase unique_ptr in SkipFuncBase::process.

2021-11-22 Thread Gabe Black (Gerrit) via gem5-dev
Gabe Black has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52046 )


 (

5 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: misc: Use a PCStateBase unique_ptr in  
SkipFuncBase::process.

..

misc: Use a PCStateBase unique_ptr in SkipFuncBase::process.

Change-Id: I0ffcf3abeaa7704a3b6eaccfd967a9654f59c741
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52046
Tested-by: kokoro 
Maintainer: Bobby R. Bruce 
Reviewed-by: Daniel Carvalho 
---
M src/kern/system_events.cc
1 file changed, 15 insertions(+), 2 deletions(-)

Approvals:
  Daniel Carvalho: Looks good to me, approved
  Bobby R. Bruce: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/kern/system_events.cc b/src/kern/system_events.cc
index 816f158..19d7cb1 100644
--- a/src/kern/system_events.cc
+++ b/src/kern/system_events.cc
@@ -39,12 +39,12 @@
 void
 SkipFuncBase::process(ThreadContext *tc)
 {
-[[maybe_unused]] TheISA::PCState oldPC = tc->pcState();
+std::unique_ptr old_pc(tc->pcState().clone());

 returnFromFuncIn(tc);

 DPRINTF(PCEvent, "skipping %s: pc = %s, newpc = %s\n", description,
-oldPC, tc->pcState());
+*old_pc, tc->pcState());
 }

 } // namespace gem5

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52046
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I0ffcf3abeaa7704a3b6eaccfd967a9654f59c741
Gerrit-Change-Number: 52046
Gerrit-PatchSet: 14
Gerrit-Owner: Gabe Black 
Gerrit-Reviewer: Bobby R. Bruce 
Gerrit-Reviewer: Daniel Carvalho 
Gerrit-Reviewer: Gabe Black 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: mem: Support stats for Cache hitLatency

2021-11-22 Thread Huang Jiasen (Gerrit) via gem5-dev
Huang Jiasen has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53103 )



Change subject: mem: Support stats for Cache hitLatency
..

mem: Support stats for Cache hitLatency

Change-Id: I18de75d3b6c4ce1784b90653e2d132ffecf1b1af
---
M src/mem/xbar.cc
M src/mem/cache/base.cc
M src/mem/cache/base.hh
M src/mem/coherent_xbar.cc
M src/mem/packet.hh
M src/mem/port.hh
6 files changed, 115 insertions(+), 7 deletions(-)



diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index f97c30a..fb450ef 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -112,6 +112,8 @@
   system(p.system),
   stats(*this)
 {
+cache_type_ = getCacheType(p.name);
+
 // the MSHR queue has no reserve entries as we check the MSHR
 // queue on every single allocation, whereas the write queue has
 // as many reserve entries as we have MSHRs, since every MSHR may
@@ -225,8 +227,24 @@
 {
 if (pkt->needsResponse()) {
 // These delays should have been consumed by now
-assert(pkt->headerDelay == 0);
-assert(pkt->payloadDelay == 0);
+// assert(pkt->headerDelay == 0);
+// assert(pkt->payloadDelay == 0);
+
+pkt->headerDelay = request_time - curTick();
+if (!cache_type_.compare("L2C")) {
+pkt->probeUpXBar = true;
+uint32_t orig_pkt_header_delay = pkt->headerDelay;
+cpuSidePort.sendFunctionalSnoop(pkt);
+DPRINTF(Cache, "BaseCache::%s pkt is %s "
+"orig pkt->headerDelay = %dC "
+"updated pkt->headerDelay = %dC\n",
+__func__,
+pkt->print(),
+ticksToCycles(orig_pkt_header_delay),
+ticksToCycles(pkt->headerDelay));
+stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] +=
+ticksToCycles(pkt->headerDelay);
+}

 pkt->makeTimingResponse();

@@ -388,6 +406,11 @@
 blk->clearPrefetched();
 }

+incHitCount(pkt);
+if (cache_type_.compare("L2C")) {
+stats.cmdStats(pkt).hitLatency[pkt->req->requestorId()] += lat;
+}
+
 handleTimingReqHit(pkt, blk, request_time);
 } else {
 handleTimingReqMiss(pkt, blk, forward_time, request_time);
@@ -1292,7 +1315,6 @@

 updateBlockData(blk, pkt, has_old_data);
 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
-incHitCount(pkt);

 // When the packet metadata arrives, the tag lookup will be done  
while
 // the payload is arriving. Then the block will be ready to access  
as

@@ -1368,8 +1390,6 @@
 updateBlockData(blk, pkt, has_old_data);
 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());

-incHitCount(pkt);
-
 // When the packet metadata arrives, the tag lookup will be done  
while
 // the payload is arriving. Then the block will be ready to access  
as

 // soon as the fill is done
@@ -1381,8 +1401,6 @@
 } else if (blk && (pkt->needsWritable() ?
 blk->isSet(CacheBlk::WritableBit) :
 blk->isSet(CacheBlk::ReadableBit))) {
-// OK to satisfy access
-incHitCount(pkt);

 // Calculate access latency based on the need to access the data  
array

 if (pkt->isRead()) {
@@ -1954,6 +1972,8 @@
("number of " + name + " hits").c_str()),
   ADD_STAT(misses, statistics::units::Count::get(),
("number of " + name + " misses").c_str()),
+  ADD_STAT(hitLatency, statistics::units::Tick::get(),
+   ("number of " + name + " hit ticks").c_str()),
   ADD_STAT(missLatency, statistics::units::Tick::get(),
("number of " + name + " miss ticks").c_str()),
   ADD_STAT(accesses, statistics::units::Count::get(),
@@ -2010,6 +2030,15 @@
 misses.subname(i, system->getRequestorName(i));
 }

+// Hit latency statistics
+hitLatency
+.init(max_requestors)
+.flags(total | nozero | nonan)
+;
+for (int i = 0; i < max_requestors; i++) {
+hitLatency.subname(i, system->getRequestorName(i));
+}
+
 // Miss latency statistics
 missLatency
 .init(max_requestors)
@@ -2116,6 +2145,10 @@
  "number of demand (read+write) hits"),
 ADD_STAT(overallHits, statistics::units::Count::get(),
  "number of overall hits"),
+ADD_STAT(demandHitLatency, statistics::units::Tick::get(),
+ "number of demand (read+write) hit ticks"),
+ADD_STAT(overallHitLatency, statistics::units::Tick::get(),
+"number of overall hit ticks"),
 ADD_STAT(demandMisses, statistics::units::Count::get(),
  "number of demand (read+write) misses"),
 ADD_STAT(overallMisses, 

[gem5-dev] Change in gem5/gem5[develop]: stdlib: Fix CustomResource metadata setting

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53086 )



Change subject: stdlib: Fix CustomResource metadata setting
..

stdlib: Fix CustomResource metadata setting

The 'metadata' parameter was not being passed to the AbstractResource
superclass.

Change-Id: Id25d82baa2039c992645e6807a46e7c329520bb7
---
M src/python/gem5/resources/resource.py
1 file changed, 13 insertions(+), 1 deletion(-)



diff --git a/src/python/gem5/resources/resource.py  
b/src/python/gem5/resources/resource.py

index 3203191..0b9b128 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -78,7 +78,7 @@
 :param local_path: The path of the resource on the host system.
 :param metadata: Add metadata for the custom resource.
 """
-super().__init__(local_path=local_path)
+super().__init__(local_path=local_path, metadata=metadata)


 class Resource(AbstractResource):

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53086
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id25d82baa2039c992645e6807a46e7c329520bb7
Gerrit-Change-Number: 53086
Gerrit-PatchSet: 1
Gerrit-Owner: Bobby R. Bruce 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: stdlib: Update the LupvBoard to use KernelDiskWorkload

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53085 )



Change subject: stdlib: Update the LupvBoard to use KernelDiskWorkload
..

stdlib: Update the LupvBoard to use KernelDiskWorkload

Change-Id: I5857f70e6ca61b8916792e634d20cdf827b21bd0
---
M src/python/gem5/components/boards/experimental/lupv_board.py
1 file changed, 40 insertions(+), 54 deletions(-)



diff --git a/src/python/gem5/components/boards/experimental/lupv_board.py  
b/src/python/gem5/components/boards/experimental/lupv_board.py

index 76e857e..e583389 100644
--- a/src/python/gem5/components/boards/experimental/lupv_board.py
+++ b/src/python/gem5/components/boards/experimental/lupv_board.py
@@ -25,13 +25,15 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import os
-from typing import Optional, List
+from typing import List

 from utils.override import overrides
 from ..abstract_board import AbstractBoard
 from ...processors.abstract_processor import AbstractProcessor
 from ...memory.abstract_memory_system import AbstractMemorySystem
 from ...cachehierarchies.abstract_cache_hierarchy import  
AbstractCacheHierarchy

+from ..kernel_disk_workload import KernelDiskWorkload
+from resources.resource import AbstractResource
 from isas import ISA
 from utils.requires import requires

@@ -56,8 +58,6 @@
 LupioSYS,
 LupV,
 AddrRange,
-CowDiskImage,
-RawDiskImage,
 Frequency,
 Port,
 )
@@ -71,7 +71,7 @@
 FdtState,
 )

-class LupvBoard(AbstractBoard):
+class LupvBoard(AbstractBoard, KernelDiskWorkload):
 """
 A board capable of full system simulation for RISC-V.
 This board uses a set of LupIO education-friendly devices.
@@ -274,56 +274,7 @@
 self.mem_ranges = [AddrRange(start=0x8000, size=mem_size)]
 memory.set_memory_range(self.mem_ranges)

-def set_workload(
-self, bootloader: str, disk_image: str, command: Optional[str] =  
None

-):
-"""Setup the full system files
-See https://github.com/darchr/lupio-gem5/blob/lupio/README.md
-for running the full system, and downloading the right files to do  
so.

-The command passes in a boot loader and disk image, as well as the
-script to start the simulaiton.
-After the workload is set up, this function will generate the  
device

-tree file and output it to the output directory.
-
-**Limitations**
-* Only supports a Linux kernel
-* Must use the provided bootloader and disk image as denoted in the
-README above.
-"""
-self.workload.object_file = bootloader
-# Set the disk image for the block device to use
-image = CowDiskImage(
-child=RawDiskImage(read_only=True),
-read_only=False
-)
-image.child.image_file = disk_image
-self.lupio_blk.image = image
-
-# Linux boot command flags
-kernel_cmd = [
-"earlycon console=ttyLIO0",
-"root=/dev/lda1",
-"ro"
-]
-self.workload.command_line = " ".join(kernel_cmd)
-
-# Note: This must be called after set_workload because it looks  
for an

-# attribute named "disk" and connects
-self._setup_io_devices()
-self._setup_pma()
-
-# Default DTB address if bbl is built with --with-dts option
-self.workload.dtb_addr = 0x87E0
-
-# We need to wait to generate the device tree until after the disk  
is
-# set up. Now that the disk and workload are set, we can generate  
the

-# device tree file.
-self.generate_device_tree(m5.options.outdir)
-self.workload.dtb_filename = os.path.join(
-m5.options.outdir, "device.dtb"
-)
-
-def generate_device_tree(self, outdir: str) -> None:
+def _generate_device_tree(self, outdir: str) -> None:
 """Creates the dtb and dts files.
 Creates two files in the outdir: 'device.dtb' and 'device.dts'
 :param outdir: Directory to output the files
@@ -566,3 +517,29 @@
 fdt.add_rootnode(root)
 fdt.writeDtsFile(os.path.join(outdir, "device.dts"))
 fdt.writeDtbFile(os.path.join(outdir, "device.dtb"))
+
+@overrides(KernelDiskWorkload)
+def get_default_kernel_args(self) -> List[str]:
+return ["earlycon console=ttyLIO0", "root={root_value}", "ro"]
+
+@overrides(KernelDiskWorkload)
+def get_disk_device(self) -> str:
+return "/dev/lda"
+
+@overrides(KernelDiskWorkload)
+def _add_disk_to_board(self, disk_image: AbstractResource) -> None:
+# Note: This must be called after set_workload because it looks  
for an

+# attribute named "disk" and connects
+self._setup_io_devices()
+self._setup_pma()
+
+# Default DTB address if bbl is built with 

[gem5-dev] Change in gem5/gem5[develop]: stdlib: Update the LupvBoard to use 'requires'

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53084 )



Change subject: stdlib: Update the LupvBoard to use 'requires'
..

stdlib: Update the LupvBoard to use 'requires'

Usage of this function was previously avoided due to a bug which has
since been fixed:
https://gem5-review.googlesource.com/c/public/gem5/+/53003

Change-Id: Idc76ca26d02dcfbb290cebcca297e50e905d8e6d
---
M src/python/gem5/components/boards/experimental/lupv_board.py
1 file changed, 17 insertions(+), 7 deletions(-)



diff --git a/src/python/gem5/components/boards/experimental/lupv_board.py  
b/src/python/gem5/components/boards/experimental/lupv_board.py

index 423e3d9..76e857e 100644
--- a/src/python/gem5/components/boards/experimental/lupv_board.py
+++ b/src/python/gem5/components/boards/experimental/lupv_board.py
@@ -33,7 +33,7 @@
 from ...memory.abstract_memory_system import AbstractMemorySystem
 from ...cachehierarchies.abstract_cache_hierarchy import  
AbstractCacheHierarchy

 from isas import ISA
-from runtime import get_runtime_isa
+from utils.requires import requires

 import m5
 from m5.objects import (
@@ -89,15 +89,12 @@
 cache_hierarchy: AbstractCacheHierarchy,
 ) -> None:

-super().__init__(clk_freq, processor, memory, cache_hierarchy)
-if get_runtime_isa() != ISA.RISCV:
-raise EnvironmentError(
-"RiscvBoard will only work with the RISC-V ISA. Please"
-" recompile gem5 with ISA=RISCV."
-)
+requires(isa_required=ISA.RISCV)
 if cache_hierarchy.is_ruby():
 raise EnvironmentError("RiscvBoard is not compatible with  
Ruby")


+super().__init__(clk_freq, processor, memory, cache_hierarchy)
+
 @overrides(AbstractBoard)
 def _setup_board(self) -> None:


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53084
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Idc76ca26d02dcfbb290cebcca297e50e905d8e6d
Gerrit-Change-Number: 53084
Gerrit-PatchSet: 1
Gerrit-Owner: Bobby R. Bruce 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: stdlib: Update the LupvBoard to account for stdlib changes

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53083 )



Change subject: stdlib: Update the LupvBoard to account for stdlib changes
..

stdlib: Update the LupvBoard to account for stdlib changes

This patch updates the board to account for the following changes:

* https://gem5-review.googlesource.com/c/public/gem5/+/51790
* https://gem5-review.googlesource.com/c/public/gem5/+/52184
* https://gem5-review.googlesource.com/c/public/gem5/+/52183

These changes, broadly speaking, remove the SimpeBoard as a superclass
and instead have all the boards inherit directly from the AbstractBoard.
It also fixes the order of operations (the order in which components are
incorporated and the board it setup).

Change-Id: I829ed515da28163cafbd292a9c141be4d350636e
---
M src/python/gem5/components/boards/experimental/lupv_board.py
1 file changed, 42 insertions(+), 5 deletions(-)



diff --git a/src/python/gem5/components/boards/experimental/lupv_board.py  
b/src/python/gem5/components/boards/experimental/lupv_board.py

index 28aa209..423e3d9 100644
--- a/src/python/gem5/components/boards/experimental/lupv_board.py
+++ b/src/python/gem5/components/boards/experimental/lupv_board.py
@@ -25,10 +25,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import os
-from typing import Optional
+from typing import Optional, List

 from utils.override import overrides
-from ..simple_board import SimpleBoard
 from ..abstract_board import AbstractBoard
 from ...processors.abstract_processor import AbstractProcessor
 from ...memory.abstract_memory_system import AbstractMemorySystem
@@ -72,7 +71,7 @@
 FdtState,
 )

-class LupvBoard(SimpleBoard):
+class LupvBoard(AbstractBoard):
 """
 A board capable of full system simulation for RISC-V.
 This board uses a set of LupIO education-friendly devices.
@@ -89,6 +88,7 @@
 memory: AbstractMemorySystem,
 cache_hierarchy: AbstractCacheHierarchy,
 ) -> None:
+
 super().__init__(clk_freq, processor, memory, cache_hierarchy)
 if get_runtime_isa() != ISA.RISCV:
 raise EnvironmentError(
@@ -97,6 +97,10 @@
 )
 if cache_hierarchy.is_ruby():
 raise EnvironmentError("RiscvBoard is not compatible with  
Ruby")

+
+@overrides(AbstractBoard)
+def _setup_board(self) -> None:
+
 self.workload = RiscvLinux()

 # Initialize all the devices that we want to use on this board
@@ -240,6 +244,19 @@
 )

 @overrides(AbstractBoard)
+def has_dma_ports(self) -> bool:
+return False
+
+@overrides(AbstractBoard)
+def get_dma_ports(self) -> List[Port]:
+raise NotImplementedError(
+"The LupvBoard does not have DMA Ports. "
+"Use `has_dma_ports()` to check this."
+)
+
+@overrides(AbstractBoard)
+
+@overrides(AbstractBoard)
 def has_io_bus(self) -> bool:
 return True

@@ -254,7 +271,7 @@
 return self.iobus.mem_side_ports

 @overrides(AbstractBoard)
-def setup_memory_ranges(self):
+def _setup_memory_ranges(self):
 memory = self.get_memory()
 mem_size = memory.get_size()
 self.mem_ranges = [AddrRange(start=0x8000, size=mem_size)]
@@ -502,7 +519,7 @@
 FdtPropertyWords("interrupt-parent",
 state.phandle(self.lupio_pic)))
 soc_node.append(lupio_rng_node)
-
+@overrides(AbstractBoard)
 #LupioSYS Device
 lupio_sys = self.lupio_sys
 lupio_sys_node = lupio_sys.generateBasicPioDeviceNode(soc_state,

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53083
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I829ed515da28163cafbd292a9c141be4d350636e
Gerrit-Change-Number: 53083
Gerrit-PatchSet: 1
Gerrit-Owner: Bobby R. Bruce 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: gpu-compute,dev-hsa: Update CP and HSAPP for full-system

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53077 )



Change subject: gpu-compute,dev-hsa: Update CP and HSAPP for full-system
..

gpu-compute,dev-hsa: Update CP and HSAPP for full-system

Make the necessary changes to connect Vega pagetable walkers for
full-system mode. Previously the CP and HSA packet processor could only
read AQL packets from system/host memory using proxy port. This allows
for AQL to be read from device memory which is used for non-blit
kernels.

Change-Id: If28eb8be68173da03e15084765e77e92eda178e9
---
M src/gpu-compute/gpu_command_processor.cc
M src/gpu-compute/gpu_command_processor.hh
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/gpu-compute/GPU.py
M src/dev/amdgpu/amdgpu_device.cc
M configs/example/gpufs/system/system.py
M src/dev/hsa/HSADevice.py
8 files changed, 75 insertions(+), 9 deletions(-)



diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index 9aeaf3a..86815ff 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -97,11 +97,15 @@

 # This arbitrary address is something in the X86 I/O hole
 hsapp_gpu_map_paddr = 0xe
+pt_walker1 = VegaPagetableWalker()
 gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr,
-   numHWQueues=args.num_hw_queues)
+   numHWQueues=args.num_hw_queues,
+   walker=pt_walker1)
 dispatcher = GPUDispatcher()
+pt_walker2 = VegaPagetableWalker()
 gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp,
-   dispatcher=dispatcher)
+   dispatcher=dispatcher,
+   walker=pt_walker2)
 shader.dispatcher = dispatcher
 shader.gpu_cmd_proc = gpu_cmd_proc

@@ -138,6 +142,8 @@
 system._dma_ports.append(device_ih)
 system._dma_ports.append(pm4_pkt_proc)
 system._dma_ports.append(gpu_mem_mgr)
+system._dma_ports.append(pt_walker1)
+system._dma_ports.append(pt_walker2)
 system._dma_ports.append(pt_walker3)
 system._dma_ports.append(pt_walker4)

diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index 35f860d..f559827 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -90,6 +90,8 @@
 sdma1->setId(1);
 deviceIH->setGPUDevice(this);
 pm4PktProc->setGPUDevice(this);
+cp->hsaPacketProc().setGPUDevice(this);
+cp->setGPUDevice(this);
 }

 void
diff --git a/src/dev/hsa/HSADevice.py b/src/dev/hsa/HSADevice.py
index f67d9a8..a94458c 100644
--- a/src/dev/hsa/HSADevice.py
+++ b/src/dev/hsa/HSADevice.py
@@ -33,6 +33,7 @@
 from m5.params import *
 from m5.proxy import *
 from m5.objects.Device import DmaDevice
+from m5.objects.VegaGPUTLB import VegaPagetableWalker

 class HSAPacketProcessor(DmaDevice):
 type = 'HSAPacketProcessor'
@@ -50,3 +51,5 @@
 # See: https://github.com/RadeonOpenCompute/atmi/tree/master/examples/
 #  runtime/kps
 pktProcessDelay = Param.Tick(440, "Packet processing delay")
+walker = Param.VegaPagetableWalker(VegaPagetableWalker(),
+"Page table walker")
diff --git a/src/dev/hsa/hsa_packet_processor.cc  
b/src/dev/hsa/hsa_packet_processor.cc

index b0421d2..43cf4e5 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -41,6 +41,7 @@
 #include "base/logging.hh"
 #include "base/trace.hh"
 #include "debug/HSAPacketProcessor.hh"
+#include "dev/amdgpu/amdgpu_device.hh"
 #include "dev/dma_device.hh"
 #include "dev/hsa/hsa_packet.hh"
 #include "dev/hsa/hw_scheduler.hh"
@@ -48,6 +49,7 @@
 #include "gpu-compute/gpu_command_processor.hh"
 #include "mem/packet_access.hh"
 #include "mem/page_table.hh"
+#include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/proxy_ptr.hh"
 #include "sim/system.hh"
@@ -73,7 +75,8 @@
 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)

 HSAPacketProcessor::HSAPacketProcessor(const Params )
-: DmaVirtDevice(p), numHWQueues(p.numHWQueues), pioAddr(p.pioAddr),
+: DmaVirtDevice(p), walker(p.walker),
+  numHWQueues(p.numHWQueues), pioAddr(p.pioAddr),
   pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p.pktProcessDelay)
 {
 DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
@@ -92,6 +95,15 @@
 }

 void
+HSAPacketProcessor::setGPUDevice(AMDGPUDevice *gpu_device)
+{
+gpuDevice = gpu_device;
+
+assert(walker);
+walker->setDevRequestor(gpuDevice->vramRequestorId());
+}
+
+void
 HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id, int  
doorbellSize)

 {
 hwSchdlr->unregisterQueue(queue_id, doorbellSize);
@@ -166,12 +178,20 @@
 TranslationGenPtr
 HSAPacketProcessor::translate(Addr vaddr, Addr size)
 {
-

[gem5-dev] Change in gem5/gem5[develop]: configs: Add disjoint VIPER configuration

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53072 )



Change subject: configs: Add disjoint VIPER configuration
..

configs: Add disjoint VIPER configuration

The disjoint VIPER configuration creates completely disconnected CPU and
GPU Ruby networks which can communicate only via the PCI bus. Either
garnet or simple network can be used. This copies most of the Ruby setup
from Ruby.py's create_system since creating disjoint networks is not
possible using Ruby.py.

Change-Id: Ibc23aa592f56554d088667d8e309ecdeb306da68
---
A configs/example/gpufs/DisjointNetwork.py
A configs/example/gpufs/Disjoint_VIPER.py
M configs/example/gpufs/runfs.py
M configs/example/gpufs/system/system.py
4 files changed, 328 insertions(+), 3 deletions(-)



diff --git a/configs/example/gpufs/DisjointNetwork.py  
b/configs/example/gpufs/DisjointNetwork.py

new file mode 100644
index 000..a655e8f
--- /dev/null
+++ b/configs/example/gpufs/DisjointNetwork.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2021 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are  
met:

+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright  
notice,

+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS  
IS"

+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  
PURPOSE

+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF  
THE

+# POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import print_function
+
+from m5.objects import *
+from m5.util import fatal
+
+from network import Network
+
+class DisjointSimple(SimpleNetwork):
+
+def __init__(self, ruby_system):
+super(DisjointSimple, self).__init__()
+
+self.netifs = []
+self.routers = []
+self.int_links = []
+self.ext_links = []
+self.ruby_system = ruby_system
+
+def connectCPU(self, opts, controllers):
+
+# Setup parameters for makeTopology call for CPU network
+exec("import topologies.%s as Topo" % opts.cpu_topology)
+_topo = eval("Topo.%s(controllers)" % opts.cpu_topology)
+_topo.makeTopology(opts, self, SimpleIntLink,
+   SimpleExtLink, Switch)
+
+self.initSimple(opts, self.int_links, self.ext_links)
+
+def connectGPU(self, opts, controllers):
+
+# Setup parameters for makeTopology call for GPU network
+exec("import topologies.%s as Topo" % opts.gpu_topology)
+_topo = eval("Topo.%s(controllers)" % opts.gpu_topology)
+_topo.makeTopology(opts, self, SimpleIntLink,
+   SimpleExtLink, Switch)
+
+self.initSimple(opts, self.int_links, self.ext_links)
+
+
+def initSimple(self, opts, int_links, ext_links):
+
+# Attach links to network
+self.int_links = int_links
+self.ext_links = ext_links
+
+self.setup_buffers()
+
+class DisjointGarnet(GarnetNetwork):
+
+def __init__(self, ruby_system):
+super(DisjointGarnet, self).__init__()
+
+self.netifs = []
+self.ruby_system = ruby_system
+
+def connectCPU(self, opts, controllers):
+
+# Setup parameters for makeTopology call for CPU network
+exec("import topologies.%s as Topo" % opts.cpu_topology)
+_topo = eval("Topo.%s(controllers)" % opts.cpu_topology)
+_topo.makeTopology(opts, self, GarnetIntLink,
+   GarnetExtLink, GarnetRouter)
+
+Network.init_network(opts, self, GarnetNetworkInterface)
+
+def connectGPU(self, opts, controllers):
+
+# Setup parameters for makeTopology call
+exec("import topologies.%s as Topo" % opts.gpu_topology)
+_topo = 

[gem5-dev] Change in gem5/gem5[develop]: gpu-compute: Add page_size.hh to CU/Shader

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53075 )



Change subject: gpu-compute: Add page_size.hh to CU/Shader
..

gpu-compute: Add page_size.hh to CU/Shader

This fixes VEGA_X86 build.

Change-Id: Id96d70eceb77be41d3c1e3a333cd961ada0d8419
---
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/shader.cc
2 files changed, 13 insertions(+), 0 deletions(-)



diff --git a/src/gpu-compute/compute_unit.cc  
b/src/gpu-compute/compute_unit.cc

index feef552..5c37086 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -35,6 +35,7 @@

 #include 

+#include "arch/x86/page_size.hh"
 #include "base/output.hh"
 #include "debug/GPUDisp.hh"
 #include "debug/GPUExec.hh"
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index ad18d01..cb1df66 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -35,6 +35,7 @@

 #include 

+#include "arch/x86/page_size.hh"
 #include "base/chunk_generator.hh"
 #include "debug/GPUAgentDisp.hh"
 #include "debug/GPUDisp.hh"

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53075
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id96d70eceb77be41d3c1e3a333cd961ada0d8419
Gerrit-Change-Number: 53075
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: configs: Add construct for GPU dirs

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53073 )



Change subject: configs: Add construct for GPU dirs
..

configs: Add construct for GPU dirs

Change-Id: I436f09d63a2ef63f1e139ffdeb29939587ef60b2
---
M configs/ruby/GPU_VIPER.py
M configs/example/gpufs/runfs.py
2 files changed, 101 insertions(+), 0 deletions(-)



diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index c8da95f..448531f 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -79,6 +79,12 @@
 parser.add_argument("--gpu-topology", type=str, default="Crossbar",
 help="Network topology to use for GPU side. "
 "Check configs/topologies for complete set")
+parser.add_argument("--dgpu-mem-size", action="store", type="string",
+  default="16GB", help="Specify the dGPU physical  
memory "

+  "size")
+parser.add_argument("--dgpu-num-dirs", type="int", default = 1,  
help="Set "

+  "the number of dGPU directories (memory controllers")
+

 def runGpuFSSystem(args):
 '''
diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index b8a4271..4d40402 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -36,6 +36,8 @@
 from m5.util import addToPath
 from .Ruby import create_topology
 from .Ruby import send_evicts
+from common import ObjectList
+from common import MemConfig
 from common import FileSystemConfig

 addToPath('../')
@@ -462,6 +464,90 @@

 return dir_cntrl_nodes

+def construct_gpudirs(options, system, ruby_system, network):
+
+dir_cntrl_nodes = []
+mem_ctrls = []
+
+xor_low_bit = 0
+
+# For an odd number of CPUs, still create the right number of  
controllers

+TCC_bits = int(math.log(options.num_tccs, 2))
+
+dir_bits = int(math.log(options.dgpu_num_dirs, 2))
+block_size_bits = int(math.log(options.cacheline_size, 2))
+numa_bit = block_size_bits + dir_bits - 1
+
+gpu_mem_range = AddrRange(0, size = opts.dgpu_mem_size)
+for i in range(options.dgpu_num_dirs):
+addr_range = m5.objects.AddrRange(gpu_mem_range.start,
+  size = gpu_mem_range.size(),
+  intlvHighBit = numa_bit,
+  intlvBits = dir_bits,
+  intlvMatch = i,
+  xorHighBit = xor_low_bit)
+
+# TODO: What are these parameters doing?
+dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits =  
TCC_bits)

+dir_cntrl.create(options, [addr_range], ruby_system, system)
+dir_cntrl.number_of_TBEs = options.num_tbes
+# TODO: What's this?
+dir_cntrl.useL3OnWT = options.use_L3_on_WT
+# the number_of_TBEs is inclusive of TBEs below
+
+# Connect the Directory controller to the ruby network
+dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
+dir_cntrl.requestFromCores.in_port = network.out_port
+
+dir_cntrl.responseFromCores = MessageBuffer()
+dir_cntrl.responseFromCores.in_port = network.out_port
+
+dir_cntrl.unblockFromCores = MessageBuffer()
+dir_cntrl.unblockFromCores.in_port = network.out_port
+
+dir_cntrl.probeToCore = MessageBuffer()
+dir_cntrl.probeToCore.out_port = network.in_port
+
+dir_cntrl.responseToCore = MessageBuffer()
+dir_cntrl.responseToCore.out_port = network.in_port
+
+dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
+dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
+dir_cntrl.requestToMemory = MessageBuffer()
+dir_cntrl.responseFromMemory = MessageBuffer()
+
+dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
+dir_cntrl.requestFromDMA.in_port = network.out_port
+
+dir_cntrl.responseToDMA = MessageBuffer()
+dir_cntrl.responseToDMA.out_port = network.in_port
+
+dir_cntrl.requestToMemory = MessageBuffer()
+dir_cntrl.responseFromMemory = MessageBuffer()
+
+# Create memory controllers too
+mem_type = ObjectList.mem_list.get(options.mem_type)
+dram_intf = MemConfig.create_mem_intf(mem_type, gpu_mem_range, i,
+int(math.log(options.dgpu_num_dirs, 2)),  
options.cacheline_size,

+xor_low_bit)
+if issubclass(mem_type, DRAMInterface):
+mem_ctrl = m5.objects.MemCtrl(dram = dram_intf)
+else:
+mem_ctrl = dram_intf
+
+mem_ctrl.port = dir_cntrl.memory_out_port
+mem_ctrl.dram.enable_dram_powerdown = False
+dir_cntrl.addr_ranges = dram_intf.range
+
+# Append
+exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i)
+

[gem5-dev] Change in gem5/gem5[develop]: dev-hsa: Update HSA queue tracking for FS mode

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53076 )



Change subject: dev-hsa: Update HSA queue tracking for FS mode
..

dev-hsa: Update HSA queue tracking for FS mode

In FS mode offsets for HSA queues are determined by the driver and
cannot be linearly assigned as they are in SE mode. Add plumbing to pass
the offset of a queue to the HSA packet processor and then to HW
scheduler.

A mapping to/from queue ID <-> doorbell offset are also needed to be
able to unmap queues. ROCm 4.2 is fairly aggressive about context
switching queues, which results in queues being constantly mapped and
unmapped.

Another result of remapping queues is the read index is not preserved in
gem5. The PM4 packet processor will write the currenty read index value
to the MQD before the queue is unmapped. The MQD is written back to
memory on unmap and re-read on mapping to obtain the previous value.
Some helper functions are added to be able to restore the read index
from a non-zero value.

Change-Id: I0153ff53765daccc1de23ea3f4b69fd2fa5a275f
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/dev/hsa/hw_scheduler.cc
M src/dev/hsa/hw_scheduler.hh
4 files changed, 71 insertions(+), 14 deletions(-)



diff --git a/src/dev/hsa/hsa_packet_processor.cc  
b/src/dev/hsa/hsa_packet_processor.cc

index 44c0e87..b0421d2 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -102,14 +102,15 @@
uint64_t basePointer,
uint64_t queue_id,
uint32_t size, int doorbellSize,
-   GfxVersion gfxVersion)
+   GfxVersion gfxVersion,
+   Addr offset, uint64_t rd_idx)
 {
 DPRINTF(HSAPacketProcessor,
  "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
  (void *)basePointer, queue_id, size);
 hwSchdlr->registerNewQueue(hostReadIndexPointer,
basePointer, queue_id, size, doorbellSize,
-   gfxVersion);
+   gfxVersion, offset, rd_idx);
 }

 AddrRangeList
@@ -584,6 +585,20 @@
 std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
 }

+void
+AQLRingBuffer::setRdIdx(uint64_t value)
+{
+_rdIdx = value;
+
+// Mark entries below the previous doorbell value as complete. This  
will

+// cause the next call to freeEntry on the queue to increment the read
+// index to the next value which will be written to the doorbell.
+for (int i = 0; i <= value; ++i) {
+_aqlComplete[i] = true;
+DPRINTF(HSAPacketProcessor, "Marking _aqlComplete[%d] true\n", i);
+}
+}
+
 bool
 AQLRingBuffer::freeEntry(void *pkt)
 {
diff --git a/src/dev/hsa/hsa_packet_processor.hh  
b/src/dev/hsa/hsa_packet_processor.hh

index 144fe42..a838069 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -234,6 +234,7 @@
  void incWrIdx(uint64_t value) { _wrIdx += value; }
  void incDispIdx(uint64_t value) { _dispIdx += value; }
  uint64_t compltnPending() { return (_dispIdx - _rdIdx); }
+ void setRdIdx(uint64_t value);
 };

 struct QCntxt
@@ -353,11 +354,13 @@
 uint64_t basePointer,
 uint64_t queue_id,
 uint32_t size, int doorbellSize,
-GfxVersion gfxVersion);
+GfxVersion gfxVersion,
+Addr offset = 0, uint64_t rd_idx = 0);
 void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
 void setDevice(GPUCommandProcessor * dev);
 void updateReadIndex(int, uint32_t);
 void getCommandsFromHost(int pid, uint32_t rl_idx);
+HWScheduler *hwScheduler() { return hwSchdlr; }

 // PIO interface
 virtual Tick read(Packet*) override;
diff --git a/src/dev/hsa/hw_scheduler.cc b/src/dev/hsa/hw_scheduler.cc
index f42dede..341c9eb 100644
--- a/src/dev/hsa/hw_scheduler.cc
+++ b/src/dev/hsa/hw_scheduler.cc
@@ -88,19 +88,22 @@
   uint64_t basePointer,
   uint64_t queue_id,
   uint32_t size, int doorbellSize,
-  GfxVersion gfxVersion)
+  GfxVersion gfxVersion,
+  Addr offset, uint64_t rd_idx)
 {
 assert(queue_id < MAX_ACTIVE_QUEUES);
 // Map queue ID to doorbell.
 // We are only using offset to pio base address as doorbell
 // We use the same mapping function used by hsa runtime to do this  
mapping

-Addr db_offset = queue_id * doorbellSize;
-if (dbMap.find(db_offset) != dbMap.end()) {
+if (!offset)
+

[gem5-dev] Change in gem5/gem5[develop]: dev-amdgpu: Setup VRAM memories in device

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53074 )



Change subject: dev-amdgpu: Setup VRAM memories in device
..

dev-amdgpu: Setup VRAM memories in device

Change-Id: Ic519429f13c4ad1d42997f361cbfe0c6e9aba29a
---
M src/dev/amdgpu/amdgpu_device.cc
1 file changed, 20 insertions(+), 0 deletions(-)



diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index db4456a..35f860d 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -42,6 +42,7 @@
 #include "dev/amdgpu/sdma_engine.hh"
 #include "dev/hsa/hw_scheduler.hh"
 #include "gpu-compute/gpu_command_processor.hh"
+#include "mem/abstract_mem.hh"
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
 #include "params/AMDGPUDevice.hh"
@@ -63,6 +64,16 @@
 romBin.read((char *)rom.data(), ROM_SIZE);
 romBin.close();

+// System pointer needs to be explicitly set for device memory since
+// DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
+// Note this means the cache line size is system wide.
+for (auto& m : p.memories) {
+m->system(p.system);
+
+// Add to system's device memory map.
+p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
+}
+
 if (config.expansionROM) {
 romRange = RangeSize(config.expansionROM, ROM_SIZE);
 } else {

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53074
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ic519429f13c4ad1d42997f361cbfe0c6e9aba29a
Gerrit-Change-Number: 53074
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: configs: Set CPU vendor for GPUFS config

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53070 )



Change subject: configs: Set CPU vendor for GPUFS config
..

configs: Set CPU vendor for GPUFS config

A valid CPU vendor string (i.e., not "M5 Simulator") needs to be passed
to CPUID in order for Linux to create the sysfs files needed for ROCm's
Thunk interface to initialize properly. If these are no created
hipDeviceProperties and other basic GPU code APIs will error out.

Change-Id: I6e3f459162e4673860a8f0a88473e38d5d7be237
---
M configs/example/gpufs/system/system.py
1 file changed, 18 insertions(+), 0 deletions(-)



diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index a1596e4..7aee3b3 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -159,6 +159,10 @@

 system.ruby._cpu_ports[i].connectCpuPorts(cpu)

+for i in range(len(system.cpu)):
+for j in range(len(system.cpu[i].isa)):
+system.cpu[i].isa[j].vendor_string = "AuthenticAMD"
+
 # The shader core will be whatever is after the CPU cores are  
accounted for

 shader_idx = args.num_cpus
 system.cpu.append(shader)

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53070
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6e3f459162e4673860a8f0a88473e38d5d7be237
Gerrit-Change-Number: 53070
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: configs: Allow for second disk in GPUFS

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53071 )



Change subject: configs: Allow for second disk in GPUFS
..

configs: Allow for second disk in GPUFS

Connect the --second-disk option in GPUFS. Typically this is used as a
benchmarks disk image. If the disk is unmounted at the time of
checkpoint, a new disk image can be mounted after restoring the
checkpoint for a simple way to add new benchmarks without recreating a
checkpoint.

Change-Id: I57b31bdf8ec628006d774feacff3fde6f533cd4b
---
M configs/example/gpufs/system/system.py
1 file changed, 19 insertions(+), 1 deletion(-)



diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index 7aee3b3..f10892e 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -56,7 +56,10 @@

 # Use the common FSConfig to setup a Linux X86 System
 (TestCPUClass, test_mem_mode, FutureClass) =  
Simulation.setCPUClass(args)

-bm = SysConfig(disks=[args.disk_image], mem=args.mem_size)
+disks = [args.disk_image]
+if args.second_disk is not None:
+disks.extend([args.second_disk])
+bm = SysConfig(disks=disks, mem=args.mem_size)
 system = makeLinuxX86System(test_mem_mode, args.num_cpus, bm, True,
   cmdline=cmdline)
 system.workload.object_file = binary(args.kernel)

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53071
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I57b31bdf8ec628006d774feacff3fde6f533cd4b
Gerrit-Change-Number: 53071
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: dev-amdgpu: Remove all copied linux headers

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53067 )



Change subject: dev-amdgpu: Remove all copied linux headers
..

dev-amdgpu: Remove all copied linux headers

And create new headers with only the defines needed. The headers point
to the ROCm 4.2 source so we know where they came from. This is easier
than trying to get linux source through a scan and checking in to
public.

Change-Id: Ia7117999606009627d8c429dd443f13899b037d8
---
M src/dev/amdgpu/sdma_engine.cc
M src/dev/amdgpu/sdma_commands.hh
M src/dev/amdgpu/sdma_mmio.hh
D src/dev/amdgpu/vega10/soc15_ih_clientid.h
M src/dev/amdgpu/interrupt_handler.hh
M src/dev/amdgpu/amdgpu_vm.cc
M src/dev/amdgpu/amdgpu_vm.hh
7 files changed, 51 insertions(+), 97 deletions(-)



diff --git a/src/dev/amdgpu/amdgpu_vm.cc b/src/dev/amdgpu/amdgpu_vm.cc
index b245aaf..40ac3f4 100644
--- a/src/dev/amdgpu/amdgpu_vm.cc
+++ b/src/dev/amdgpu/amdgpu_vm.cc
@@ -253,7 +253,7 @@
 range.size = std::min(range.size, next - range.vaddr);
 range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();

-printf("AMDGPUVM: AGP translation %#lx -> %#lx\n",
+DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
 range.vaddr, range.paddr);
 }

@@ -289,7 +289,7 @@
 range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
 }

-printf("AMDGPUVM: GART translation %#lx -> %#lx\n",
+DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
 range.vaddr, range.paddr);
 }

@@ -305,7 +305,7 @@
 range.size = std::min(range.size, next - range.vaddr);
 range.paddr = range.vaddr - vm->getMMHUBBase();

-printf("AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
+DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
 range.vaddr, range.paddr);
 }

@@ -315,7 +315,8 @@
 // Get base address of the page table for this vmid
 Addr base = vm->getPageTableBase(vmid);
 Addr start = vm->getPageTableStart(vmid);
-printf("User tl base %#lx start %#lx walker %p\n", base, start,  
walker);

+DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
+base, start, walker);

 bool dummy;
 unsigned logBytes;
diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh
index b57b9ec..6c6c749 100644
--- a/src/dev/amdgpu/amdgpu_vm.hh
+++ b/src/dev/amdgpu/amdgpu_vm.hh
@@ -47,10 +47,10 @@
  * MMIO offsets for graphics register bus manager (GRBM). These values were
  * taken from linux header files. The header files can be found here:
  *
- *  
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/include/

- *  asic_reg/gc/gc_9_0_offset.h
- *  
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/include/

- *  asic_reg/mmhub/mmhub_1_0_offset.h
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
+ *  drivers/gpu/drm/amd/include/ asic_reg/gc/gc_9_0_offset.h
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
+ *  drivers/gpu/drm/amd/include/ asic_reg/mmhub/mmhub_1_0_offset.h
  */

 #define mmVM_INVALIDATE_ENG17_ACK  
0x08c6
diff --git a/src/dev/amdgpu/interrupt_handler.hh  
b/src/dev/amdgpu/interrupt_handler.hh

index 15a2c18..6324e0e 100644
--- a/src/dev/amdgpu/interrupt_handler.hh
+++ b/src/dev/amdgpu/interrupt_handler.hh
@@ -50,7 +50,25 @@
 namespace gem5
 {

-/*
+/**
+ * Defines from driver code. Taken from
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
+ * drivers/gpu/drm/amd/include/soc15_ih_clientid.h
+ */
+enum soc15_ih_clientid
+{
+SOC15_IH_CLIENTID_RLC   = 0x07,
+SOC15_IH_CLIENTID_SDMA0 = 0x08,
+SOC15_IH_CLIENTID_SDMA1 = 0x09
+};
+
+enum ihSourceId
+{
+TRAP_ID = 224
+};
+
+/**
+ * MSI-style interrupts. Send a "cookie" response to clear interrupts.
  * From [1] we know the size of the struct is 8 dwords. Then we can look  
at the register shift offsets in [2] to guess the rest.

  * Or we can also look at [3].
  *
@@ -80,6 +98,9 @@
 uint32_t source_data_dw4;
 } AMDGPUInterruptCookie;

+/**
+ * Struct to contain all interrupt handler related registers.
+ */
 typedef struct
 {
 uint32_t IH_Cntl;
diff --git a/src/dev/amdgpu/sdma_commands.hh  
b/src/dev/amdgpu/sdma_commands.hh

index ddebf5a..bbd7b92 100644
--- a/src/dev/amdgpu/sdma_commands.hh
+++ b/src/dev/amdgpu/sdma_commands.hh
@@ -37,8 +37,8 @@
 /**
  * Commands for the SDMA engine. The header files can be found here:
  *
- *  
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/amdgpu/

- *  vega10_sdma_pkt_open.h
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
+ * drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h
  */
 #define SDMA_OP_NOP  0
 #define SDMA_OP_COPY  1
diff --git 

[gem5-dev] Change in gem5/gem5[develop]: dev-amdgpu: Add VM class for apertures, TranslationGens

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53066 )



Change subject: dev-amdgpu: Add VM class for apertures, TranslationGens
..

dev-amdgpu: Add VM class for apertures, TranslationGens

Create a VM class to reduce clutter in the amdgpu_device.* files. This
new file is in charge of reading/writting MMIOs related to VM contexts
and apertures. It also provides ranges checks for various apertures and
breaks out the MMIO interface so that there are not overloaded macro
definitions in the device MMIO methods.

The new translation generator classes for the various apertures are also
added to this class.

Change-Id: Ic224c1aa485685685b1136a46eed50bcf99d2350
---
M src/dev/amdgpu/sdma_engine.cc
M src/dev/amdgpu/amdgpu_defines.hh
M src/dev/amdgpu/SConscript
A src/dev/amdgpu/amdgpu_vm.cc
A src/dev/amdgpu/amdgpu_vm.hh
M src/dev/amdgpu/amdgpu_device.cc
M src/dev/amdgpu/amdgpu_device.hh
M src/arch/amdgpu/vega/tlb.cc
8 files changed, 754 insertions(+), 182 deletions(-)



diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc
index ca7e6a0..8cbf8d9 100644
--- a/src/arch/amdgpu/vega/tlb.cc
+++ b/src/arch/amdgpu/vega/tlb.cc
@@ -621,7 +621,7 @@
 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
 virtPageAddr);

-Addr base = gpuDevice->getPageTableBase(1);
+Addr base = gpuDevice->getVM().getPageTableBase(1);
 Addr vaddr = pkt->req->getVaddr();
 walker->setDevRequestor(gpuDevice->vramRequestorId());

@@ -822,7 +822,7 @@
 PageTableEntry pte;

 // Initialize walker state for VMID
-Addr base = tlb->gpuDevice->getPageTableBase(1);
+Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);
  
tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());


 // Do page table walk
diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript
index 830a991..ebf7977 100644
--- a/src/dev/amdgpu/SConscript
+++ b/src/dev/amdgpu/SConscript
@@ -38,6 +38,7 @@
 SimObject('AMDGPU.py', tags='x86 isa')

 Source('amdgpu_device.cc', tags='x86 isa')
+Source('amdgpu_vm.cc', tags='x86 isa')
 Source('interrupt_handler.cc', tags='x86 isa')
 Source('memory_manager.cc', tags='x86 isa')
 Source('mmio_reader.cc', tags='x86 isa')
diff --git a/src/dev/amdgpu/amdgpu_defines.hh  
b/src/dev/amdgpu/amdgpu_defines.hh

index 778b64c..9e62644 100644
--- a/src/dev/amdgpu/amdgpu_defines.hh
+++ b/src/dev/amdgpu/amdgpu_defines.hh
@@ -34,6 +34,8 @@
 #ifndef __DEV_AMDGPU_AMDGPU_DEFINES_HH__
 #define __DEV_AMDGPU_AMDGPU_DEFINES_HH__

+#include "base/types.hh"
+
 namespace gem5
 {

@@ -49,6 +51,9 @@
 RLC
 };

+// AMD GPUs support 16 different virtual address spaces
+static constexpr int AMDGPU_VM_COUNT = 16;
+
 /* Names of BARs used by the device. */
 constexpr int FRAMEBUFFER_BAR = 0;
 constexpr int DOORBELL_BAR = 2;
diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index c41a505..d3b0577 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -36,6 +36,7 @@
 #include 

 #include "debug/AMDGPUDevice.hh"
+#include "dev/amdgpu/amdgpu_vm.hh"
 #include "dev/amdgpu/interrupt_handler.hh"
 #include "dev/amdgpu/sdma_engine.hh"
 #include "mem/packet.hh"
@@ -74,9 +75,6 @@
 sdma1->setGPUDevice(this);
 sdma1->setId(1);
 deviceIH->setGPUDevice(this);
-
-// Zero out context0
-memset(, 0, sizeof(SysVMContext));
 }

 void
@@ -190,8 +188,24 @@
 void
 AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
 {
+Addr aperture = gpuvm.getMmioAperture(offset);
+Addr aperture_offset = offset - aperture;
+
+// By default read from MMIO trace. Overwrite the packet for a select
+// few more dynamic MMIOs.
 DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
 mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
+
+switch (aperture) {
+  case GRBM_BASE:
+gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
+break;
+  case MMHUB_BASE:
+gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
+break;
+  default:
+break;
+}
 }

 void
@@ -199,14 +213,15 @@
 {
 DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);

-Addr aperture = getFrameAperture(offset);
+Addr aperture = gpuvm.getFrameAperture(offset);
 Addr aperture_offset = offset - aperture;

 // Record the value
 frame_regs[aperture_offset] = pkt->getLE();
-if (aperture == gartBase) {
+if (aperture == gpuvm.gartBase()) {
 DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n",  
aperture_offset,

 bits(frame_regs[aperture_offset], 48, 12));
+gpuvm.gartTable[aperture_offset] = pkt->getLE();
 }
 }

@@ -242,7 +257,7 @@
 void
 AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
 {
-Addr aperture = getMmioAperture(offset);
+

[gem5-dev] Change in gem5/gem5[develop]: dev-amdgpu: Hotfix variable initialization

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53063 )



Change subject: dev-amdgpu: Hotfix variable initialization
..

dev-amdgpu: Hotfix variable initialization

These non-initialized variables were causing the trace reader not to
record all lines.

Change-Id: I88764493d8124c072bc90ff9c08aa26421467f7b
---
M src/dev/amdgpu/mmio_reader.hh
1 file changed, 15 insertions(+), 3 deletions(-)



diff --git a/src/dev/amdgpu/mmio_reader.hh b/src/dev/amdgpu/mmio_reader.hh
index 6c88f0b..a057183 100644
--- a/src/dev/amdgpu/mmio_reader.hh
+++ b/src/dev/amdgpu/mmio_reader.hh
@@ -99,9 +99,9 @@
 trace_BAR_t trace_BARs[6];

 /* Indexes used to print driver loading progress. */
-uint64_t trace_index;
-uint64_t trace_final_index;
-uint64_t trace_cur_index;
+uint64_t trace_index = 0;
+uint64_t trace_final_index = 0;
+uint64_t trace_cur_index = 0;

 /* An entry in the MMIO trace. */
 struct MmioTrace

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53063
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I88764493d8124c072bc90ff9c08aa26421467f7b
Gerrit-Change-Number: 53063
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: configs: Connect SDMA, IH, and memory manager in GPUFS

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53065 )



Change subject: configs: Connect SDMA, IH, and memory manager in GPUFS
..

configs: Connect SDMA, IH, and memory manager in GPUFS

Add the devices that have been added to the staging branch so far to the
config file so that they may be tested.

Change-Id: I44110c9a24559936102a246c9658abb84a8ce07e
---
M src/dev/amdgpu/amdgpu_device.cc
M configs/example/gpufs/system/system.py
2 files changed, 69 insertions(+), 0 deletions(-)



diff --git a/configs/example/gpufs/system/system.py  
b/configs/example/gpufs/system/system.py

index 4ec3618..7cb6a0a 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -100,14 +100,38 @@
 shader.dispatcher = dispatcher
 shader.gpu_cmd_proc = gpu_cmd_proc

+# GPU Interrupt Handler
+device_ih = AMDGPUInterruptHandler()
+system.pc.south_bridge.gpu.device_ih = device_ih
+
+# Setup the SDMA engines
+pt_walker3 = VegaPagetableWalker()
+pt_walker4 = VegaPagetableWalker()
+
+sdma0 = SDMAEngine(walker=pt_walker3)
+sdma1 = SDMAEngine(walker=pt_walker4)
+
+system.pc.south_bridge.gpu.sdma0 = sdma0
+system.pc.south_bridge.gpu.sdma1 = sdma1
+
+# GPU data path
+gpu_mem_mgr = AMDGPUMemoryManager()
+system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr
+
 # GPU, HSAPP, and GPUCommandProc are DMA devices
 system._dma_ports.append(gpu_hsapp)
 system._dma_ports.append(gpu_cmd_proc)
 system._dma_ports.append(system.pc.south_bridge.gpu)
+system._dma_ports.append(sdma0)
+system._dma_ports.append(sdma1)
+system._dma_ports.append(device_ih)

 gpu_hsapp.pio = system.iobus.mem_side_ports
 gpu_cmd_proc.pio = system.iobus.mem_side_ports
 system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports
+sdma0.pio = system.iobus.mem_side_ports
+sdma1.pio = system.iobus.mem_side_ports
+device_ih.pio = system.iobus.mem_side_ports

 # Create Ruby system using Ruby.py for now
 Ruby.create_system(args, True, system, system.iobus,
diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index 77bd915..c41a505 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -214,6 +214,29 @@
 AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
 {
 DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
+
+if (doorbells.find(offset) != doorbells.end()) {
+QueueType q_type = doorbells[offset];
+DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
+  offset, q_type);
+switch (q_type) {
+  case SDMAGfx: {
+SDMAEngine *sdmaEng = getSDMAEngine(offset);
+sdmaEng->processGfx(pkt->getLE());
+  } break;
+  case SDMAPage: {
+SDMAEngine *sdmaEng = getSDMAEngine(offset);
+sdmaEng->processPage(pkt->getLE());
+  } break;
+  case InterruptHandler:
+deviceIH->updateRptr(pkt->getLE());
+break;
+  default:
+panic("Write to unkown queue type!");
+}
+} else {
+warn("Unknown doorbell offset: %lx\n", offset);
+}
 }

 void
@@ -225,6 +248,16 @@
 DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);

 switch (aperture) {
+  case SDMA0_BASE:
+sdma0->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
+break;
+  case SDMA1_BASE:
+sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
+break;
+  case GRBM_BASE:
+gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
+pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
+break;
   case IH_BASE:
 deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
 break;

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53065
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I44110c9a24559936102a246c9658abb84a8ce07e
Gerrit-Change-Number: 53065
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: dev-amdgpu: Add checkpoint support to AMDGPUDevice

2021-11-22 Thread Alex Dutu (Gerrit) via gem5-dev
Alex Dutu has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53069 )



Change subject: dev-amdgpu: Add checkpoint support to AMDGPUDevice
..

dev-amdgpu: Add checkpoint support to AMDGPUDevice

These will be needed for the second checkpoint.

Change-Id: I85ee2cbc0df130868d19376c4d98dbe4d424698e
---
M src/dev/amdgpu/amdgpu_device.cc
1 file changed, 113 insertions(+), 0 deletions(-)



diff --git a/src/dev/amdgpu/amdgpu_device.cc  
b/src/dev/amdgpu/amdgpu_device.cc

index 2afcd05..db4456a 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -431,6 +431,55 @@
 {
 // Serialize the PciDevice base class
 PciDevice::serialize(cp);
+
+uint64_t regs_size = regs.size();
+uint64_t doorbells_size = doorbells.size();
+uint64_t sdma_engs_size = sdmaEngs.size();
+
+SERIALIZE_SCALAR(regs_size);
+SERIALIZE_SCALAR(doorbells_size);
+SERIALIZE_SCALAR(sdma_engs_size);
+
+// Make a c-style array of the regs to serialize
+uint32_t reg_addrs[regs_size];
+uint64_t reg_values[regs_size];
+uint32_t doorbells_offset[doorbells_size];
+QueueType doorbells_queues[doorbells_size];
+uint32_t sdma_engs_offset[sdma_engs_size];
+int sdma_engs[sdma_engs_size];
+
+int idx = 0;
+for (auto & it : regs) {
+reg_addrs[idx] = it.first;
+reg_values[idx] = it.second;
+++idx;
+}
+
+idx = 0;
+for (auto & it : doorbells) {
+doorbells_offset[idx] = it.first;
+doorbells_queues[idx] = it.second;
+++idx;
+}
+
+idx = 0;
+for (auto & it : sdmaEngs) {
+sdma_engs_offset[idx] = it.first;
+sdma_engs[idx] = it.second == sdma0 ? 0 : 1;
+++idx;
+}
+
+SERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
+SERIALIZE_ARRAY(reg_values, sizeof(reg_values)/sizeof(reg_values[0]));
+SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
+sizeof(doorbells_offset[0]));
+SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
+sizeof(doorbells_queues[0]));
+SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
+sizeof(sdma_engs_offset[0]));
+SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
+
+// Serialize the device memory
 }

 void
@@ -438,6 +487,59 @@
 {
 // Unserialize the PciDevice base class
 PciDevice::unserialize(cp);
+
+uint64_t regs_size = 0;
+uint64_t doorbells_size = 0;
+uint64_t sdma_engs_size = 0;
+
+UNSERIALIZE_SCALAR(regs_size);
+UNSERIALIZE_SCALAR(doorbells_size);
+UNSERIALIZE_SCALAR(sdma_engs_size);
+
+if (regs_size > 0) {
+uint32_t reg_addrs[regs_size];
+uint64_t reg_values[regs_size];
+
+UNSERIALIZE_ARRAY(reg_addrs,  
sizeof(reg_addrs)/sizeof(reg_addrs[0]));

+UNSERIALIZE_ARRAY(reg_values,
+  sizeof(reg_values)/sizeof(reg_values[0]));
+
+for (int idx = 0; idx < regs_size; ++idx) {
+regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
+}
+}
+
+if (doorbells_size > 0) {
+uint32_t doorbells_offset[doorbells_size];
+QueueType doorbells_queues[doorbells_size];
+
+UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
+sizeof(doorbells_offset[0]));
+UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
+sizeof(doorbells_queues[0]));
+
+for (int idx = 0; idx < doorbells_size; ++idx) {
+regs.insert(std::make_pair(doorbells_offset[idx],
+  doorbells_queues[idx]));
+doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
+}
+}
+
+if (sdma_engs_size > 0) {
+uint32_t sdma_engs_offset[sdma_engs_size];
+int sdma_engs[sdma_engs_size];
+
+UNSERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
+sizeof(sdma_engs_offset[0]));
+UNSERIALIZE_ARRAY(sdma_engs,  
sizeof(sdma_engs)/sizeof(sdma_engs[0]));

+
+for (int idx = 0; idx < sdma_engs_size; ++idx) {
+SDMAEngine *sdma = sdma_engs[idx] == 0 ? sdma0 : sdma1;
+sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
+}
+}
+
+// Unserialize the device memory
 }

 uint16_t

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53069
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I85ee2cbc0df130868d19376c4d98dbe4d424698e
Gerrit-Change-Number: 53069
Gerrit-PatchSet: 1
Gerrit-Owner: Alex Dutu 
Gerrit-CC: Matthew Poremba 
Gerrit-MessageType: newchange
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

[gem5-dev] Change in gem5/gem5[develop]: stdlib: Fix enum comparison in 'requires'

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/53003 )


Change subject: stdlib: Fix enum comparison in 'requires'
..

stdlib: Fix enum comparison in 'requires'

Change-Id: Iff8e6edcbb553e8180c21c583d79b689f9a40bce
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53003
Reviewed-by: Jason Lowe-Power 
Maintainer: Jason Lowe-Power 
Tested-by: kokoro 
---
M src/python/gem5/utils/requires.py
1 file changed, 16 insertions(+), 2 deletions(-)

Approvals:
  Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/python/gem5/utils/requires.py  
b/src/python/gem5/utils/requires.py

index 76a8b41..7789254 100644
--- a/src/python/gem5/utils/requires.py
+++ b/src/python/gem5/utils/requires.py
@@ -72,7 +72,7 @@
 runtime_coherence_protocol = get_runtime_coherence_protocol()
 kvm_available = os.access("/dev/kvm", mode=os.R_OK | os.W_OK)

-if isa_required != None and isa_required != runtime_isa:
+if isa_required != None and isa_required.value != runtime_isa.value:
 raise Exception(
 _get_exception_str(
 msg="The current ISA is '{}'. Required: '{}'".format(
@@ -83,7 +83,8 @@

 if (
 coherence_protocol_required != None
-and coherence_protocol_required != runtime_coherence_protocol
+and coherence_protocol_required.value
+!= runtime_coherence_protocol.value
 ):
 raise Exception(
 _get_exception_str(

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/53003
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Iff8e6edcbb553e8180c21c583d79b689f9a40bce
Gerrit-Change-Number: 53003
Gerrit-PatchSet: 5
Gerrit-Owner: Bobby R. Bruce 
Gerrit-Reviewer: Bobby R. Bruce 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: tests: Removing unittests for .perf and .prof

2021-11-22 Thread Bobby R. Bruce (Gerrit) via gem5-dev
Bobby R. Bruce has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52983 )


Change subject: tests: Removing unittests for .perf and .prof
..

tests: Removing unittests for .perf and .prof

.perf and .prof no longer exist as build targets. We now only test .opt
and .debug as of this commit.

Change-Id: Ieaf93edd33fe64330d50c6d446bfd21f9f07a895
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52983
Reviewed-by: Bobby R. Bruce 
Maintainer: Bobby R. Bruce 
Tested-by: kokoro 
---
M tests/nightly.sh
1 file changed, 16 insertions(+), 2 deletions(-)

Approvals:
  Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/tests/nightly.sh b/tests/nightly.sh
index 41db369..9f69eb1 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -74,8 +74,6 @@
 # Run the unit tests.
 unit_test opt
 unit_test debug
-unit_test perf
-unit_test prof

 # Run the gem5 long tests.
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/52983
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ieaf93edd33fe64330d50c6d446bfd21f9f07a895
Gerrit-Change-Number: 52983
Gerrit-PatchSet: 3
Gerrit-Owner: Bobby R. Bruce 
Gerrit-Reviewer: Bobby R. Bruce 
Gerrit-Reviewer: Jason Lowe-Power 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: configs: Replace master/slave terminology from ruby scripts

2021-11-22 Thread Giacomo Travaglini (Gerrit) via gem5-dev
Giacomo Travaglini has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52865 )


 (

3 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: configs: Replace master/slave terminology from ruby  
scripts

..

configs: Replace master/slave terminology from ruby scripts

Signed-off-by: Giacomo Travaglini 
Change-Id: Iabc82a19e8d6c7cf619874dc2926276c349eba7c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52865
Tested-by: kokoro 
Reviewed-by: Jason Lowe-Power 
Maintainer: Jason Lowe-Power 
---
M configs/ruby/MI_example.py
M configs/ruby/AMD_Base_Constructor.py
M configs/ruby/MOESI_hammer.py
M configs/ruby/CHI_config.py
M configs/ruby/MESI_Two_Level.py
M configs/ruby/MESI_Three_Level_HTM.py
M configs/ruby/Ruby.py
M configs/ruby/MOESI_CMP_directory.py
M configs/ruby/MOESI_CMP_token.py
M configs/ruby/MESI_Three_Level.py
M configs/ruby/MOESI_AMD_Base.py
11 files changed, 168 insertions(+), 155 deletions(-)

Approvals:
  Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/configs/ruby/AMD_Base_Constructor.py  
b/configs/ruby/AMD_Base_Constructor.py

index cd4733b..1abc6d7 100644
--- a/configs/ruby/AMD_Base_Constructor.py
+++ b/configs/ruby/AMD_Base_Constructor.py
@@ -116,11 +116,11 @@
 cp_cntrl.create(options, ruby_system, system)

 # Connect the CP controllers to the ruby network
-cp_cntrl.requestFromCore = ruby_system.network.slave
-cp_cntrl.responseFromCore = ruby_system.network.slave
-cp_cntrl.unblockFromCore = ruby_system.network.slave
-cp_cntrl.probeToCore = ruby_system.network.master
-cp_cntrl.responseToCore = ruby_system.network.master
+cp_cntrl.requestFromCore = ruby_system.network.in_port
+cp_cntrl.responseFromCore = ruby_system.network.in_port
+cp_cntrl.unblockFromCore = ruby_system.network.in_port
+cp_cntrl.probeToCore = ruby_system.network.out_port
+cp_cntrl.responseToCore = ruby_system.network.out_port

 exec("system.cp_cntrl%d = cp_cntrl" % i)
 #
diff --git a/configs/ruby/CHI_config.py b/configs/ruby/CHI_config.py
index 6507970..2d39659 100644
--- a/configs/ruby/CHI_config.py
+++ b/configs/ruby/CHI_config.py
@@ -351,7 +351,6 @@
 self.__dict__['support_inst_reqs'] = True
 # Compatibility with certain scripts that wire up ports
 # without connectCpuPorts
-self.__dict__['slave'] = dseq.in_ports
 self.__dict__['in_ports'] = dseq.in_ports

 def connectCpuPorts(self, cpu):
diff --git a/configs/ruby/MESI_Three_Level.py  
b/configs/ruby/MESI_Three_Level.py

index 4088ecd..c184e57 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -173,16 +173,16 @@

 # Connect the L1 controllers and the network
 l1_cntrl.requestToL2 = MessageBuffer()
-l1_cntrl.requestToL2.master = ruby_system.network.slave
+l1_cntrl.requestToL2.out_port = ruby_system.network.in_port
 l1_cntrl.responseToL2 = MessageBuffer()
-l1_cntrl.responseToL2.master = ruby_system.network.slave
+l1_cntrl.responseToL2.out_port = ruby_system.network.in_port
 l1_cntrl.unblockToL2 = MessageBuffer()
-l1_cntrl.unblockToL2.master = ruby_system.network.slave
+l1_cntrl.unblockToL2.out_port = ruby_system.network.in_port

 l1_cntrl.requestFromL2 = MessageBuffer()
-l1_cntrl.requestFromL2.slave = ruby_system.network.master
+l1_cntrl.requestFromL2.in_port = ruby_system.network.out_port
 l1_cntrl.responseFromL2 = MessageBuffer()
-l1_cntrl.responseFromL2.slave = ruby_system.network.master
+l1_cntrl.responseFromL2.in_port = ruby_system.network.out_port


 for j in range(num_l2caches_per_cluster):
@@ -203,18 +203,18 @@

 # Connect the L2 controllers and the network
 l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
-l2_cntrl.DirRequestFromL2Cache.master =  
ruby_system.network.slave
+l2_cntrl.DirRequestFromL2Cache.out_port =  
ruby_system.network.in_port

 l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
-l2_cntrl.L1RequestFromL2Cache.master =  
ruby_system.network.slave
+l2_cntrl.L1RequestFromL2Cache.out_port =  
ruby_system.network.in_port

 l2_cntrl.responseFromL2Cache = MessageBuffer()
-l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave
+l2_cntrl.responseFromL2Cache.out_port =  
ruby_system.network.in_port


 l2_cntrl.unblockToL2Cache = MessageBuffer()
-l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master
+l2_cntrl.unblockToL2Cache.in_port =  

[gem5-dev] Change in gem5/gem5[develop]: configs: Replace master/slave terminology from configs scripts

2021-11-22 Thread Giacomo Travaglini (Gerrit) via gem5-dev
Giacomo Travaglini has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52866 )


Change subject: configs: Replace master/slave terminology from configs  
scripts

..

configs: Replace master/slave terminology from configs scripts

Signed-off-by: Giacomo Travaglini 
Change-Id: I6a1a06abeca1621efb378c400c5b24b33a7a3727
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52866
Tested-by: kokoro 
Reviewed-by: Bobby R. Bruce 
Reviewed-by: Jason Lowe-Power 
Maintainer: Bobby R. Bruce 
Maintainer: Jason Lowe-Power 
---
M configs/example/ruby_direct_test.py
M configs/common/GPUTLBConfig.py
M configs/common/MemConfig.py
M configs/example/ruby_random_test.py
M configs/common/FSConfig.py
M configs/common/HMC.py
M configs/nvm/sweep.py
M configs/common/CacheConfig.py
M configs/example/se.py
M configs/example/garnet_synth_traffic.py
M configs/example/memcheck.py
M configs/splash2/cluster.py
M configs/dram/low_power_sweep.py
M configs/nvm/sweep_hybrid.py
M configs/example/memtest.py
M configs/example/ruby_mem_test.py
M configs/example/etrace_replay.py
M configs/splash2/run.py
M configs/example/read_config.py
M configs/example/fs.py
M configs/example/hmctest.py
M configs/dram/lat_mem_rd.py
M configs/dram/sweep.py
M configs/example/hmc_hello.py
24 files changed, 184 insertions(+), 154 deletions(-)

Approvals:
  Jason Lowe-Power: Looks good to me, but someone else must approve; Looks  
good to me, approved

  Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py
index b270a83..4979f7d 100644
--- a/configs/common/CacheConfig.py
+++ b/configs/common/CacheConfig.py
@@ -122,8 +122,8 @@
**_get_cache_opts('l2', options))

 system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
-system.l2.cpu_side = system.tol2bus.master
-system.l2.mem_side = system.membus.slave
+system.l2.cpu_side = system.tol2bus.mem_side_ports
+system.l2.mem_side = system.membus.cpu_side_ports

 if options.memchecker:
 system.memchecker = MemChecker()
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index efb0af6..8b8fb4e 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -120,11 +120,11 @@
 self.t1000.attachIO(self.iobus)
 self.mem_ranges = [AddrRange(Addr('1MB'), size = '64MB'),
AddrRange(Addr('2GB'), size ='256MB')]
-self.bridge.master = self.iobus.slave
-self.bridge.slave = self.membus.master
+self.bridge.mem_side_port = self.iobus.cpu_side_ports
+self.bridge.cpu_side_port = self.membus.mem_side_ports
 self.disk0 = CowMmDisk()
 self.disk0.childImage(mdesc.disks()[0])
-self.disk0.pio = self.iobus.master
+self.disk0.pio = self.iobus.mem_side_ports

 # The puart0 and hvuart are placed on the IO bus, so create ranges
 # for them. The remaining IO range is rather fragmented, so poke
@@ -160,12 +160,12 @@
 self.partition_desc = SimpleMemory(image_file=binary('1up-md.bin'),
 range=AddrRange(0x1f1200, size='8kB'))

-self.rom.port = self.membus.master
-self.nvram.port = self.membus.master
-self.hypervisor_desc.port = self.membus.master
-self.partition_desc.port = self.membus.master
+self.rom.port = self.membus.mem_side_ports
+self.nvram.port = self.membus.mem_side_ports
+self.hypervisor_desc.port = self.membus.mem_side_ports
+self.partition_desc.port = self.membus.mem_side_ports

-self.system_port = self.membus.slave
+self.system_port = self.membus.cpu_side_ports

 self.workload = workload

@@ -189,10 +189,10 @@
 self.iobus = IOXBar()
 if not ruby:
 self.bridge = Bridge(delay='50ns')
-self.bridge.master = self.iobus.slave
+self.bridge.mem_side_port = self.iobus.cpu_side_ports
 self.membus = MemBus()
 self.membus.badaddr_responder.warn_access = "warn"
-self.bridge.slave = self.membus.master
+self.bridge.cpu_side_port = self.membus.mem_side_ports

 self.mem_mode = mem_mode

@@ -299,13 +299,13 @@
 # I/O traffic enters iobus
 self.external_io = ExternalMaster(port_data="external_io",
   port_type=external_memory)
-self.external_io.port = self.iobus.slave
+self.external_io.port = self.iobus.cpu_side_ports

 # Ensure iocache only receives traffic destined for (actual)  
memory.

 self.iocache = ExternalSlave(port_data="iocache",
  port_type=external_memory,
  addr_ranges=self.mem_ranges)
-self.iocache.port = self.iobus.master
+self.iocache.port = self.iobus.mem_side_ports

 # Let system_port get to nvmem and nothing else.
 

[gem5-dev] Change in gem5/gem5[develop]: tests: Replace master/slave terminology from tests scripts

2021-11-22 Thread Giacomo Travaglini (Gerrit) via gem5-dev
Giacomo Travaglini has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52863 )


Change subject: tests: Replace master/slave terminology from tests scripts
..

tests: Replace master/slave terminology from tests scripts

Signed-off-by: Giacomo Travaglini 
Change-Id: Id7aafc082c7e4cfc977e807141e63a3feb5a6348
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52863
Tested-by: kokoro 
Maintainer: Bobby R. Bruce 
Reviewed-by: Bobby R. Bruce 
---
M tests/configs/simple-atomic-mp-ruby.py
M tests/gem5/m5threads_test_atomic/caches.py
M tests/gem5/memory/simple-run.py
M tests/configs/o3-timing-ruby.py
M tests/configs/t1000-simple-atomic.py
M tests/gem5/configs/base_config.py
M tests/configs/memtest-ruby.py
M tests/gem5/memory/memtest-run.py
M tests/gem5/cpu_tests/run.py
M tests/configs/gpu-randomtest-ruby.py
M tests/configs/o3-timing-mp-ruby.py
M tests/gem5/m5threads_test_atomic/atomic_system.py
M tests/configs/gpu-ruby.py
M tests/configs/memtest-filter.py
M tests/configs/pc-simple-timing-ruby.py
M tests/configs/memtest.py
M tests/configs/rubytest-ruby.py
17 files changed, 74 insertions(+), 60 deletions(-)

Approvals:
  Bobby R. Bruce: Looks good to me, approved; Looks good to me, approved
  Giacomo Travaglini: Looks good to me, approved
  kokoro: Regressions pass




diff --git a/tests/configs/gpu-randomtest-ruby.py  
b/tests/configs/gpu-randomtest-ruby.py

index 0d1171c..03d31fe 100644
--- a/tests/configs/gpu-randomtest-ruby.py
+++ b/tests/configs/gpu-randomtest-ruby.py
@@ -125,11 +125,11 @@
 # Tie the ruby tester ports to the ruby cpu read and write ports
 #
 if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
-tester.cpuInstDataPort = ruby_port.slave
+tester.cpuInstDataPort = ruby_port.in_ports
 elif ruby_port.support_data_reqs:
-tester.cpuDataPort = ruby_port.slave
+tester.cpuDataPort = ruby_port.in_ports
 elif ruby_port.support_inst_reqs:
-tester.cpuInstPort = ruby_port.slave
+tester.cpuInstPort = ruby_port.in_ports

 # Do not automatically retry stalled Ruby requests
 ruby_port.no_retry_on_stall = True
diff --git a/tests/configs/gpu-ruby.py b/tests/configs/gpu-ruby.py
index 1864156..2f57779 100644
--- a/tests/configs/gpu-ruby.py
+++ b/tests/configs/gpu-ruby.py
@@ -305,7 +305,7 @@
 system.ruby._cpu_ports[0].in_ports,
 system.ruby._cpu_ports[0].in_ports,
 system.ruby._cpu_ports[0].interrupt_out_port)
-system.ruby._cpu_ports[0].mem_master_port = system.piobus.slave
+system.ruby._cpu_ports[0].mem_request_port = system.piobus.cpu_side_ports

 # attach CU ports to Ruby
 # Because of the peculiarities of the CP core, you may have 1 CPU but 2
@@ -338,8 +338,8 @@
 assert(args.num_cp == 0)

 # connect dispatcher to the system.piobus
-dispatcher.pio = system.piobus.master
-dispatcher.dma = system.piobus.slave
+dispatcher.pio = system.piobus.mem_side_ports
+dispatcher.dma = system.piobus.cpu_side_ports

 # Connect the CPU and GPU via GPU Dispatcher  
###

 # CPU rings the GPU doorbell to notify a pending task
diff --git a/tests/configs/memtest-filter.py  
b/tests/configs/memtest-filter.py

index cce7397..042b3cd 100644
--- a/tests/configs/memtest-filter.py
+++ b/tests/configs/memtest-filter.py
@@ -50,10 +50,10 @@
 system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain,
 snoop_filter = SnoopFilter())
 system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB',  
assoc=8)

-system.l2c.cpu_side = system.toL2Bus.master
+system.l2c.cpu_side = system.toL2Bus.mem_side_ports

 # connect l2c to membus
-system.l2c.mem_side = system.membus.slave
+system.l2c.mem_side = system.membus.cpu_side_ports

 # add L1 caches
 for cpu in cpus:
@@ -61,12 +61,12 @@
 cpu.clk_domain = system.cpu_clk_domain
 cpu.l1c = L1Cache(size = '32kB', assoc = 4)
 cpu.l1c.cpu_side = cpu.port
-cpu.l1c.mem_side = system.toL2Bus.slave
+cpu.l1c.mem_side = system.toL2Bus.cpu_side_ports

-system.system_port = system.membus.slave
+system.system_port = system.membus.cpu_side_ports

 # connect memory to membus
-system.physmem.port = system.membus.master
+system.physmem.port = system.membus.mem_side_ports


 # ---
diff --git a/tests/configs/memtest-ruby.py b/tests/configs/memtest-ruby.py
index f8fbd10..d6e1cf4 100644
--- a/tests/configs/memtest-ruby.py
+++ b/tests/configs/memtest-ruby.py
@@ -100,7 +100,7 @@
  # Tie the cpu port to the ruby cpu ports and
  # physmem, respectively
  #
- cpus[i].port = ruby_port.slave
+ cpus[i].port = ruby_port.in_ports

  #
  # Since the memtester is incredibly bursty, increase the deadlock
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index a957674..01a5a46 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -49,10 +49,10 @@

 system.toL2Bus = 

[gem5-dev] Change in gem5/gem5[develop]: learning_gem5: Replace master/slave terminology from learning_gem5 sc...

2021-11-22 Thread Giacomo Travaglini (Gerrit) via gem5-dev
Giacomo Travaglini has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/52864 )


 (

3 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the  
submitted one.
 )Change subject: learning_gem5: Replace master/slave terminology from  
learning_gem5 scripts

..

learning_gem5: Replace master/slave terminology from learning_gem5 scripts

Signed-off-by: Giacomo Travaglini 
Change-Id: I6c30d18155acb151d732ae7e35e29f6facad78fd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52864
Tested-by: kokoro 
Reviewed-by: Jason Lowe-Power 
Maintainer: Jason Lowe-Power 
---
M configs/learning_gem5/part3/msi_caches.py
M configs/learning_gem5/part3/ruby_caches_MI_example.py
M configs/learning_gem5/part3/test_caches.py
3 files changed, 42 insertions(+), 27 deletions(-)

Approvals:
  Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/configs/learning_gem5/part3/msi_caches.py  
b/configs/learning_gem5/part3/msi_caches.py

index 1614c46..957adf2 100644
--- a/configs/learning_gem5/part3/msi_caches.py
+++ b/configs/learning_gem5/part3/msi_caches.py
@@ -98,7 +98,7 @@
 # Set up a proxy port for the system_port. Used for load binaries  
and

 # other functional-only things.
 self.sys_port_proxy = RubyPortProxy()
-system.system_port = self.sys_port_proxy.slave
+system.system_port = self.sys_port_proxy.in_ports

 # Connect the cpu's cache, interrupt, and TLB ports to Ruby
 for i,cpu in enumerate(cpus):
@@ -155,18 +155,19 @@
 # explicitly connected to anything.
 self.mandatoryQueue = MessageBuffer()

-# All message buffers must be created and connected to the general
-# Ruby network. In this case, "slave/master" don't mean the same  
thing
-# as normal gem5 ports. If a MessageBuffer is a "to" buffer (i.e.,  
out)

-# then you use the "master", otherwise, the slave.
+# All message buffers must be created and connected to the
+# general Ruby network. In this case, "in_port/out_port" don't
+# mean the same thing as normal gem5 ports. If a MessageBuffer
+# is a "to" buffer (i.e., out) then you use the "out_port",
+# otherwise, the in_port.
 self.requestToDir = MessageBuffer(ordered = True)
-self.requestToDir.master = ruby_system.network.slave
+self.requestToDir.out_port = ruby_system.network.in_port
 self.responseToDirOrSibling = MessageBuffer(ordered = True)
-self.responseToDirOrSibling.master = ruby_system.network.slave
+self.responseToDirOrSibling.out_port = ruby_system.network.in_port
 self.forwardFromDir = MessageBuffer(ordered = True)
-self.forwardFromDir.slave = ruby_system.network.master
+self.forwardFromDir.in_port = ruby_system.network.out_port
 self.responseFromDirOrSibling = MessageBuffer(ordered = True)
-self.responseFromDirOrSibling.slave = ruby_system.network.master
+self.responseFromDirOrSibling.in_port =  
ruby_system.network.out_port


 class DirController(Directory_Controller):

@@ -192,14 +193,14 @@

 def connectQueues(self, ruby_system):
 self.requestFromCache = MessageBuffer(ordered = True)
-self.requestFromCache.slave = ruby_system.network.master
+self.requestFromCache.in_port = ruby_system.network.out_port
 self.responseFromCache = MessageBuffer(ordered = True)
-self.responseFromCache.slave = ruby_system.network.master
+self.responseFromCache.in_port = ruby_system.network.out_port

 self.responseToCache = MessageBuffer(ordered = True)
-self.responseToCache.master = ruby_system.network.slave
+self.responseToCache.out_port = ruby_system.network.in_port
 self.forwardToCache = MessageBuffer(ordered = True)
-self.forwardToCache.master = ruby_system.network.slave
+self.forwardToCache.out_port = ruby_system.network.in_port

 # These are other special message buffers. They are used to send
 # requests to memory and responses from memory back to the  
controller.
diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py  
b/configs/learning_gem5/part3/ruby_caches_MI_example.py

index 0406829..b67e6b1 100644
--- a/configs/learning_gem5/part3/ruby_caches_MI_example.py
+++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py
@@ -96,7 +96,7 @@
 # Set up a proxy port for the system_port. Used for load binaries  
and

 # other functional-only things.
 self.sys_port_proxy = RubyPortProxy()
-system.system_port = self.sys_port_proxy.slave
+system.system_port = self.sys_port_proxy.in_ports

 # Connect the cpu's cache, interrupt, and TLB ports to Ruby
 for i,cpu in enumerate(cpus):
@@ -149,13