[gem5-dev] changeset in gem5: config: KVM acceleration for apu_se.py

2016-08-22 Thread David Hashe
changeset d726d0cea027 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=d726d0cea027
description:
config: KVM acceleration for apu_se.py

Add support for using KVM to accelerate APU simulations. The intended 
use
case is to fast-forward through runtime initialization until the first
kernel launch.

diffstat:

 configs/example/apu_se.py |  157 ++---
 configs/ruby/Ruby.py  |3 +
 2 files changed, 120 insertions(+), 40 deletions(-)

diffs (231 lines):

diff -r ceece50bbf08 -r d726d0cea027 configs/example/apu_se.py
--- a/configs/example/apu_se.py Mon Aug 22 11:41:37 2016 -0400
+++ b/configs/example/apu_se.py Mon Aug 22 11:43:44 2016 -0400
@@ -150,6 +150,10 @@
   help="number of physical banks per LDS module")
 parser.add_option("--ldsBankConflictPenalty", type="int", default=1,
   help="number of cycles per LDS bank conflict")
+parser.add_option('--fast-forward-pseudo-op', action='store_true',
+  help = 'fast forward using kvm until the m5_switchcpu'
+  ' pseudo-op is encountered, then switch cpus. subsequent'
+  ' m5_switchcpu pseudo-ops will toggle back and forth')
 
 
 Ruby.define_options(parser)
@@ -280,47 +284,67 @@
 # List of CPUs
 cpu_list = []
 
-# We only support timing mode for shader and memory
+CpuClass, mem_mode = Simulation.getCPUClass(options.cpu_type)
+if CpuClass == AtomicSimpleCPU:
+fatal("AtomicSimpleCPU is not supported")
+if mem_mode != 'timing':
+fatal("Only the timing memory mode is supported")
 shader.timing = True
-mem_mode = 'timing'
 
-# create the cpus
-for i in range(options.num_cpus):
-cpu = None
-if options.cpu_type == "detailed":
-cpu = DerivO3CPU(cpu_id=i,
- clk_domain = SrcClockDomain(
- clock = options.CPUClock,
- voltage_domain = VoltageDomain(
- voltage = options.cpu_voltage)))
-elif options.cpu_type == "timing":
-cpu = TimingSimpleCPU(cpu_id=i,
-  clk_domain = SrcClockDomain(
-  clock = options.CPUClock,
-  voltage_domain = VoltageDomain(
-  voltage = options.cpu_voltage)))
+if options.fast_forward and options.fast_forward_pseudo_op:
+fatal("Cannot fast-forward based both on the number of instructions and"
+  " on pseudo-ops")
+fast_forward = options.fast_forward or options.fast_forward_pseudo_op
+
+if fast_forward:
+FutureCpuClass, future_mem_mode = CpuClass, mem_mode
+
+CpuClass = X86KvmCPU
+mem_mode = 'atomic_noncaching'
+# Leave shader.timing untouched, because its value only matters at the
+# start of the simulation and because we require switching cpus
+# *before* the first kernel launch.
+
+future_cpu_list = []
+
+# Initial CPUs to be used during fast-forwarding.
+for i in xrange(options.num_cpus):
+cpu = CpuClass(cpu_id = i,
+   clk_domain = SrcClockDomain(
+   clock = options.CPUClock,
+   voltage_domain = VoltageDomain(
+   voltage = options.cpu_voltage)))
+cpu_list.append(cpu)
+
+if options.fast_forward:
+cpu.max_insts_any_thread = int(options.fast_forward)
+
+if fast_forward:
+MainCpuClass = FutureCpuClass
+else:
+MainCpuClass = CpuClass
+
+# CPs to be used throughout the simulation.
+for i in xrange(options.num_cp):
+cp = MainCpuClass(cpu_id = options.num_cpus + i,
+  clk_domain = SrcClockDomain(
+  clock = options.CPUClock,
+  voltage_domain = VoltageDomain(
+  voltage = options.cpu_voltage)))
+cp_list.append(cp)
+
+# Main CPUs (to be used after fast-forwarding if fast-forwarding is specified).
+for i in xrange(options.num_cpus):
+cpu = MainCpuClass(cpu_id = i,
+   clk_domain = SrcClockDomain(
+   clock = options.CPUClock,
+   voltage_domain = VoltageDomain(
+   voltage = options.cpu_voltage)))
+if fast_forward:
+cpu.switched_out = True
+future_cpu_list.append(cpu)
 else:
-fatal("Atomic CPU not supported/tested")
-cpu_list.append(cpu)
-
-# create the command processors
-for i in xrange(options.num_cp):
-cp = None
-if options.cpu_type == "detailed":
-cp = DerivO3CPU(cpu_id = options.num_cpus + i,
-clk_domain = SrcClockDomain(
-clock = options.CPUClock,
-voltage_domain = VoltageDomain(
-voltage = options.cpu_voltage)))
-elif options.cpu_type == 'timing':
-

[gem5-dev] changeset in gem5: cpu, mem, sim: Change how KVM maps memory

2016-08-22 Thread David Hashe
changeset 29606f000389 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=29606f000389
description:
cpu, mem, sim: Change how KVM maps memory

Only map memories into the KVM guest address space that are
marked as usable by KVM. Create BackingStoreEntry class
containing flags for is_conf_reported, in_addr_map, and
kvm_map.

diffstat:

 src/cpu/kvm/vm.cc |  11 ++--
 src/mem/AbstractMemory.py |   6 +
 src/mem/abstract_mem.cc   |   2 +-
 src/mem/abstract_mem.hh   |  15 +++-
 src/mem/physical.cc   |  51 +++
 src/mem/physical.hh   |  54 --
 6 files changed, 120 insertions(+), 19 deletions(-)

diffs (289 lines):

diff -r 7e7157941d70 -r 29606f000389 src/cpu/kvm/vm.cc
--- a/src/cpu/kvm/vm.cc Tue Aug 16 10:59:15 2016 +0100
+++ b/src/cpu/kvm/vm.cc Mon Aug 22 11:41:05 2016 -0400
@@ -341,13 +341,18 @@
 void
 KvmVM::delayedStartup()
 {
-const std::vector >(
+const std::vector (
 system->getPhysMem().getBackingStore());
 
 DPRINTF(Kvm, "Mapping %i memory region(s)\n", memories.size());
 for (int slot(0); slot < memories.size(); ++slot) {
-const AddrRange (memories[slot].first);
-void *pmem(memories[slot].second);
+if (!memories[slot].kvmMap) {
+DPRINTF(Kvm, "Skipping region marked as not usable by KVM\n");
+continue;
+}
+
+const AddrRange (memories[slot].range);
+void *pmem(memories[slot].pmem);
 
 if (pmem) {
 DPRINTF(Kvm, "Mapping region: 0x%p -> 0x%llx [size: 0x%llx]\n",
diff -r 7e7157941d70 -r 29606f000389 src/mem/AbstractMemory.py
--- a/src/mem/AbstractMemory.py Tue Aug 16 10:59:15 2016 +0100
+++ b/src/mem/AbstractMemory.py Mon Aug 22 11:41:05 2016 -0400
@@ -57,6 +57,12 @@
 # e.g. by the testers that use shadow memories as a reference
 in_addr_map = Param.Bool(True, "Memory part of the global address map")
 
+# When KVM acceleration is used, memory is mapped into the guest process
+# address space and accessed directly. Some memories may need to be
+# excluded from this mapping if they overlap with other memory ranges or
+# are not accessible by the CPU.
+kvm_map = Param.Bool(True, "Should KVM map this memory for the guest")
+
 # Should the bootloader include this memory when passing
 # configuration information about the physical memory layout to
 # the kernel, e.g. using ATAG or ACPI
diff -r 7e7157941d70 -r 29606f000389 src/mem/abstract_mem.cc
--- a/src/mem/abstract_mem.cc   Tue Aug 16 10:59:15 2016 +0100
+++ b/src/mem/abstract_mem.cc   Mon Aug 22 11:41:05 2016 -0400
@@ -57,7 +57,7 @@
 AbstractMemory::AbstractMemory(const Params *p) :
 MemObject(p), range(params()->range), pmemAddr(NULL),
 confTableReported(p->conf_table_reported), inAddrMap(p->in_addr_map),
-_system(NULL)
+kvmMap(p->kvm_map), _system(NULL)
 {
 }
 
diff -r 7e7157941d70 -r 29606f000389 src/mem/abstract_mem.hh
--- a/src/mem/abstract_mem.hh   Tue Aug 16 10:59:15 2016 +0100
+++ b/src/mem/abstract_mem.hh   Mon Aug 22 11:41:05 2016 -0400
@@ -111,10 +111,13 @@
 uint8_t* pmemAddr;
 
 // Enable specific memories to be reported to the configuration table
-bool confTableReported;
+const bool confTableReported;
 
 // Should the memory appear in the global address map
-bool inAddrMap;
+const bool inAddrMap;
+
+// Should KVM map this memory for the guest
+const bool kvmMap;
 
 std::list lockedAddrList;
 
@@ -283,6 +286,14 @@
 bool isInAddrMap() const { return inAddrMap; }
 
 /**
+ * When shadow memories are in use, KVM may want to make one or the other,
+ * but cannot map both into the guest address space.
+ *
+ * @return if this memory should be mapped into the KVM guest address space
+ */
+bool isKvmMap() const { return kvmMap; }
+
+/**
  * Perform an untimed memory access and update all the state
  * (e.g. locked addresses) and statistics accordingly. The packet
  * is turned into a response if required.
diff -r 7e7157941d70 -r 29606f000389 src/mem/physical.cc
--- a/src/mem/physical.cc   Tue Aug 16 10:59:15 2016 +0100
+++ b/src/mem/physical.cc   Mon Aug 22 11:41:05 2016 -0400
@@ -111,7 +111,9 @@
 // memories are allowed to overlap in the logic address
 // map
 vector unmapped_mems{m};
-createBackingStore(m->getAddrRange(), unmapped_mems);
+createBackingStore(m->getAddrRange(), unmapped_mems,
+   m->isConfReported(), m->isInAddrMap(),
+   m->isKvmMap());
 }
 }
 
@@ -132,7 +134,19 @@
 if (!intlv_ranges.empty() &&
 !intlv_ranges.back().mergesWith(r.first)) {
 AddrRange merged_range(intlv_ranges);

[gem5-dev] changeset in gem5: tests: Add example of using KVM acceleration ...

2016-08-22 Thread David Hashe
changeset ceece50bbf08 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=ceece50bbf08
description:
tests: Add example of using KVM acceleration with an app

Add #ifdef's to gpu-hello.cpp demonstrating how to annotate an 
application
for KVM acceleration.

diffstat:

 tests/test-progs/gpu-hello/src/gpu-hello.cpp |  10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diffs (27 lines):

diff -r 29606f000389 -r ceece50bbf08 
tests/test-progs/gpu-hello/src/gpu-hello.cpp
--- a/tests/test-progs/gpu-hello/src/gpu-hello.cpp  Mon Aug 22 11:41:05 
2016 -0400
+++ b/tests/test-progs/gpu-hello/src/gpu-hello.cpp  Mon Aug 22 11:41:37 
2016 -0400
@@ -41,6 +41,12 @@
 #include 
 #include 
 
+#ifdef KVM_SWITCH
+#include "m5op.h"
+
+void *m5_mem = (void*)0xc900;
+#endif
+
 #define SUCCESS 0
 #define FAILURE 1
 
@@ -247,6 +253,10 @@
 return FAILURE;
 }
 
+#ifdef KVM_SWITCH
+m5_switchcpu();
+#endif
+
 // 2. Launch kernel
 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
 globalThreads, localThreads, 0, NULL,
___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3599: mem: Sort memory commands and update DRAMPower

2016-08-22 Thread Jason Lowe-Power


> On Aug. 16, 2016, 3:20 p.m., Jason Lowe-Power wrote:
> > src/mem/dram_ctrl.hh, line 340
> > 
> >
> > Since you want this to be sorted, would it make more sense to use a 
> > sorted list (e.g., priority_queue)?
> 
> Wendy Elsasser wrote:
> Sorting is only required at periodic intervals; the list does not need to 
> be sorted per insertion.  Regardless, I agree that a priority_queue would 
> work as well as the current implementation of a vector.  As the current 
> implementation has been verified, is there any issues sticking with this 
> choice?

No. I wont be picky about it. For the most part is seems that the overall 
complexity is about the same either way, so it's up to you.


- Jason


---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3599/#review8640
---


On Aug. 11, 2016, 9:07 a.m., Curtis Dunham wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3599/
> ---
> 
> (Updated Aug. 11, 2016, 9:07 a.m.)
> 
> 
> Review request for Default and Matthias Jung.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> mem: Sort memory commands and update DRAMPower
> 
> Add local variable to stores commands to be issued.
> These commands are in order within a single bank but will be out
> of order across banks & ranks.
> 
> A new procedure, flushCmdList, sorts commands across banks / ranks,
> and flushes the sorted list, up to curTick() to DRAMPower.
> This is currently called in refresh, once all previous commands are
> guaranteed to have completed.  Could be called in other events like
> the powerEvent as well.
> 
> By only flushing commands up to curTick(), will not get out of sync
> when flushed at a periodic stats dump (done in subsequent patch).
> 
> Change-Id: I4ac65a52407f64270db1e16a1fb04cfe7f638851
> Reviewed-by: Radhika Jagtap 
> 
> 
> Diffs
> -
> 
>   src/mem/dram_ctrl.hh e9096175eb38ac39f37c91bfdf2a450b9664e222 
>   src/mem/dram_ctrl.cc e9096175eb38ac39f37c91bfdf2a450b9664e222 
> 
> Diff: http://reviews.gem5.org/r/3599/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Curtis Dunham
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3599: mem: Sort memory commands and update DRAMPower

2016-08-22 Thread Jason Lowe-Power

---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3599/#review8674
---

Ship it!


Ship It!

- Jason Lowe-Power


On Aug. 11, 2016, 9:07 a.m., Curtis Dunham wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3599/
> ---
> 
> (Updated Aug. 11, 2016, 9:07 a.m.)
> 
> 
> Review request for Default and Matthias Jung.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> mem: Sort memory commands and update DRAMPower
> 
> Add local variable to stores commands to be issued.
> These commands are in order within a single bank but will be out
> of order across banks & ranks.
> 
> A new procedure, flushCmdList, sorts commands across banks / ranks,
> and flushes the sorted list, up to curTick() to DRAMPower.
> This is currently called in refresh, once all previous commands are
> guaranteed to have completed.  Could be called in other events like
> the powerEvent as well.
> 
> By only flushing commands up to curTick(), will not get out of sync
> when flushed at a periodic stats dump (done in subsequent patch).
> 
> Change-Id: I4ac65a52407f64270db1e16a1fb04cfe7f638851
> Reviewed-by: Radhika Jagtap 
> 
> 
> Diffs
> -
> 
>   src/mem/dram_ctrl.hh e9096175eb38ac39f37c91bfdf2a450b9664e222 
>   src/mem/dram_ctrl.cc e9096175eb38ac39f37c91bfdf2a450b9664e222 
> 
> Diff: http://reviews.gem5.org/r/3599/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Curtis Dunham
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3599: mem: Sort memory commands and update DRAMPower

2016-08-22 Thread Wendy Elsasser


> On Aug. 16, 2016, 3:20 p.m., Jason Lowe-Power wrote:
> > src/mem/dram_ctrl.hh, line 340
> > 
> >
> > Since you want this to be sorted, would it make more sense to use a 
> > sorted list (e.g., priority_queue)?

Sorting is only required at periodic intervals; the list does not need to be 
sorted per insertion.  Regardless, I agree that a priority_queue would work as 
well as the current implementation of a vector.  As the current implementation 
has been verified, is there any issues sticking with this choice?


- Wendy


---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3599/#review8640
---


On Aug. 11, 2016, 9:07 a.m., Curtis Dunham wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3599/
> ---
> 
> (Updated Aug. 11, 2016, 9:07 a.m.)
> 
> 
> Review request for Default and Matthias Jung.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> mem: Sort memory commands and update DRAMPower
> 
> Add local variable to stores commands to be issued.
> These commands are in order within a single bank but will be out
> of order across banks & ranks.
> 
> A new procedure, flushCmdList, sorts commands across banks / ranks,
> and flushes the sorted list, up to curTick() to DRAMPower.
> This is currently called in refresh, once all previous commands are
> guaranteed to have completed.  Could be called in other events like
> the powerEvent as well.
> 
> By only flushing commands up to curTick(), will not get out of sync
> when flushed at a periodic stats dump (done in subsequent patch).
> 
> Change-Id: I4ac65a52407f64270db1e16a1fb04cfe7f638851
> Reviewed-by: Radhika Jagtap 
> 
> 
> Diffs
> -
> 
>   src/mem/dram_ctrl.hh e9096175eb38ac39f37c91bfdf2a450b9664e222 
>   src/mem/dram_ctrl.cc e9096175eb38ac39f37c91bfdf2a450b9664e222 
> 
> Diff: http://reviews.gem5.org/r/3599/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Curtis Dunham
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3619: kvm: Support timing accesses for KVM cpu

2016-08-22 Thread Michael LeBeane


> On Aug. 21, 2016, 3:29 a.m., Michael LeBeane wrote:
> > Well, having played around with this solution for KVM MMIO + Ruby a bit it 
> > does work (with the addition of a timing_noncacheable state as Andreas S. 
> > noted), but not very well.  If you put the system in timing mode you get 
> > events like DRAM refresh that make it so you can't stay in KVM very long, 
> > which kinda defeats the purpose. Any non-hacky ideas how to get around this?
> 
> Andreas Sandberg wrote:
> This is an unfortunate side-effect of using KVM. A display processor 
> would cause the same type of issues (you'd get events at least once per 
> refresh, but possibly once per N pixels). There are basically two high-level 
> solutions: 
> 
>1. Don't issue frequent events when running in KVM mode. I have been 
> considering this for the HDLCD. If running in *_noncacheable, we'd just 
> reduce simulation fidelity to get events down to something manageable.
>2. Run KVM in a separate thread similar to when simulating a 
> multi-core system using KVM. This allows you to put devices in one event 
> queue and each of the simulated KVM cores in separate event queues and 
> control when the queues are synchronised.
> 
> In this particular case, I think 2 sounds like a reasonable solution 
> since you presumably want good timing fidelity for the GPU. Synchronisation 
> is going to be "interesting", but the KVM CPU should be able to cope with 
> being in its own thread. Communication should only really happen when 
> handling MMIOs and interrupts, which already support synchronisation. I have 
> something along these lines in my KVM script to map CPUs to threads:
> 
> ```python
> root.sim_quantum=m5.ticks.fromSeconds(options.quantum * 1E-3)
> 
> # Assign independent event queues (threads) to the KVM CPUs,
> # event queue 0 is reserved for simulated devices.
> for idx, cpu in enumerate(system.cpu):
> # Child objects usually inherit the parent's event
> # queue. Override that and use queue 0 instead.
> for obj in cpu.descendants():
> obj.eventq_index = 0
> 
> cpu.eventq_index = idx + 1
> ```
> 
> You might want to test the timing changes on their own in a multi-core 
> system in timing_noncacheable mode to make sure that they synchronise 
> correctly. I have a sneaking suspicion that they don't at the moment.

Thanks for the good suggestions!  Yeah, solution 2) seems like the right way to 
go.  I don't exactly need to run KVM and the GPU at the same time (right now I 
switch CPU models), but I can see how 2) would be very useful for those out 
there just studying GPU performance.

Looks like the dram refresh event is turned off in atomic mode specifically for 
KVM.  Making it do the same in timing with KVM running for solution 1) sounds a 
bit hacky if not impossible.

Having played with multiple event queues a bit for other projects, I know that 
it sometimes fails to work as intended the first time around :-). I'm a bit too 
busy with other stuff to get sucked up in this right now, but will hopefully 
get some free cycles to explore this later.


- Michael


---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3619/#review8668
---


On Aug. 21, 2016, 3:19 a.m., Michael LeBeane wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3619/
> ---
> 
> (Updated Aug. 21, 2016, 3:19 a.m.)
> 
> 
> Review request for Default.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> Changeset 11561:4595cc3848fc
> ---
> kvm: Support timing accesses for KVM cpu
> This patch enables timing accesses for KVM cpu.  A new state,
> RunningMMIOPending, is added to indicate that there are outstanding timing
> requests generated by KVM in the system.  KVM's tick() is disabled and the
> simulation does not enter into KVM until all outstanding timing requests have
> completed.  The main motivation for this is to allow KVM CPU to perform MMIO
> in Ruby, since Ruby does not support atomic accesses.
> 
> 
> Diffs
> -
> 
>   src/cpu/kvm/x86_cpu.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.hh 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
> 
> Diff: http://reviews.gem5.org/r/3619/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Michael LeBeane
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3619: kvm: Support timing accesses for KVM cpu

2016-08-22 Thread Andreas Sandberg


> On Aug. 21, 2016, 4:29 a.m., Michael LeBeane wrote:
> > Well, having played around with this solution for KVM MMIO + Ruby a bit it 
> > does work (with the addition of a timing_noncacheable state as Andreas S. 
> > noted), but not very well.  If you put the system in timing mode you get 
> > events like DRAM refresh that make it so you can't stay in KVM very long, 
> > which kinda defeats the purpose. Any non-hacky ideas how to get around this?

This is an unfortunate side-effect of using KVM. A display processor would 
cause the same type of issues (you'd get events at least once per refresh, but 
possibly once per N pixels). There are basically two high-level solutions: 

   1. Don't issue frequent events when running in KVM mode. I have been 
considering this for the HDLCD. If running in *_noncacheable, we'd just reduce 
simulation fidelity to get events down to something manageable.
   2. Run KVM in a separate thread similar to when simulating a multi-core 
system using KVM. This allows you to put devices in one event queue and each of 
the simulated KVM cores in separate event queues and control when the queues 
are synchronised.

In this particular case, I think 2 sounds like a reasonable solution since you 
presumably want good timing fidelity for the GPU. Synchronisation is going to 
be "interesting", but the KVM CPU should be able to cope with being in its own 
thread. Communication should only really happen when handling MMIOs and 
interrupts, which already support synchronisation. I have something along these 
lines in my KVM script to map CPUs to threads:

```python
root.sim_quantum=m5.ticks.fromSeconds(options.quantum * 1E-3)

# Assign independent event queues (threads) to the KVM CPUs,
# event queue 0 is reserved for simulated devices.
for idx, cpu in enumerate(system.cpu):
# Child objects usually inherit the parent's event
# queue. Override that and use queue 0 instead.
for obj in cpu.descendants():
obj.eventq_index = 0

cpu.eventq_index = idx + 1
```

You might want to test the timing changes on their own in a multi-core system 
in timing_noncacheable mode to make sure that they synchronise correctly. I 
have a sneaking suspicion that they don't at the moment.


- Andreas


---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3619/#review8668
---


On Aug. 21, 2016, 4:19 a.m., Michael LeBeane wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3619/
> ---
> 
> (Updated Aug. 21, 2016, 4:19 a.m.)
> 
> 
> Review request for Default.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> Changeset 11561:4595cc3848fc
> ---
> kvm: Support timing accesses for KVM cpu
> This patch enables timing accesses for KVM cpu.  A new state,
> RunningMMIOPending, is added to indicate that there are outstanding timing
> requests generated by KVM in the system.  KVM's tick() is disabled and the
> simulation does not enter into KVM until all outstanding timing requests have
> completed.  The main motivation for this is to allow KVM CPU to perform MMIO
> in Ruby, since Ruby does not support atomic accesses.
> 
> 
> Diffs
> -
> 
>   src/cpu/kvm/x86_cpu.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.hh 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
> 
> Diff: http://reviews.gem5.org/r/3619/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Michael LeBeane
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Review Request 3619: kvm: Support timing accesses for KVM cpu

2016-08-22 Thread Andreas Sandberg

---
This is an automatically generated e-mail. To reply, visit:
http://reviews.gem5.org/r/3619/#review8669
---

Ship it!


Ship It!

- Andreas Sandberg


On Aug. 21, 2016, 4:19 a.m., Michael LeBeane wrote:
> 
> ---
> This is an automatically generated e-mail. To reply, visit:
> http://reviews.gem5.org/r/3619/
> ---
> 
> (Updated Aug. 21, 2016, 4:19 a.m.)
> 
> 
> Review request for Default.
> 
> 
> Repository: gem5
> 
> 
> Description
> ---
> 
> Changeset 11561:4595cc3848fc
> ---
> kvm: Support timing accesses for KVM cpu
> This patch enables timing accesses for KVM cpu.  A new state,
> RunningMMIOPending, is added to indicate that there are outstanding timing
> requests generated by KVM in the system.  KVM's tick() is disabled and the
> simulation does not enter into KVM until all outstanding timing requests have
> completed.  The main motivation for this is to allow KVM CPU to perform MMIO
> in Ruby, since Ruby does not support atomic accesses.
> 
> 
> Diffs
> -
> 
>   src/cpu/kvm/x86_cpu.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.hh 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
>   src/cpu/kvm/base.cc 91f58918a76abf1a1dedcaa70a9b95789da7b88c 
> 
> Diff: http://reviews.gem5.org/r/3619/diff/
> 
> 
> Testing
> ---
> 
> 
> Thanks,
> 
> Michael LeBeane
> 
>

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


Re: [gem5-dev] Fault handling issue

2016-08-22 Thread Bjoern A. Zeeb

On 19 Aug 2016, at 17:45, Rodolfo Guilherme Wottrich wrote:

Hi Rodolfo,

how did you solve this (code wise).  I am trying to hunt down an 
undelivered Page Fault on x86 FS and another problem only showing up as 
I add more caches (e.g., a L2) and I am just curious about all kinds of 
x86 problems other people see and their code changes to fix things.


Thanks,
Bjoern

For future reference: my problem was not that subsequent instructions 
would
not squash. I missed out on the fact that the store queue's behaviour 
is
asynchronous and although the instructions had been committed many 
cycles
before, the requests would still be in the store queue to be consumed 
by
the cache. It was only a matter of forcefully removing the stores for 
the

LSQ and it worked.

---
Rodolfo Wottrich

On Mon, Aug 15, 2016 at 5:24 PM, Rodolfo Guilherme Wottrich <
rgw...@gmail.com> wrote:


Hi Fernando,

Thank you for the suggestion. Yes, I have tried that, but the problem 
is

that no similar faults take happen, especially in SE mode.
I wonder if it may be the case of some squashing function call that I 
am

missing.

Does anybody have experience with squashing instructions in the 
commit

stage?


---
Rodolfo Wottrich

On Mon, Aug 8, 2016 at 10:08 AM, Fernando Endo 


wrote:


Hello,

Probably I can't technically help you here, but have you considered
observing the simulator behavior when similar faults happen? For 
example,
simulate a program that access an invalid address and enable all 
related

debug flags to track it (--debug-flags option).

Hope it helps,

--
Fernando A. Endo, Post-doc

INRIA Rennes-Bretagne Atlantique
France


2016-08-03 3:30 GMT+02:00 Rodolfo Guilherme Wottrich 
:



Hello,

I would like to request some assistance if possible. For my PhD 
work, I
need to be able to trigger a CPU fault when a particular condition 
in

the

L1 cache controller is met. I am using an o3 x86 CPU and Ruby in SE

mode.


I have come to a partial solution to the problem, based on a patch 
I

found

which dealt with a similar situation. That involves creating a new
Sequencer callback function that is used only at that specific

situation in

the cache controller which triggers a sequence of actions that

eventually
lead to a Fault object being instantiated in the LSQ and in the 
commit

stage of the pipeline.

The problem is that although the Fault and its handling are 
"working"

(control flow changes and registers are updated as they should),

subsequent
requests still keep being received by the cache in the mandatory 
queue

from
the instructions following the offending one. Those instructions 
should

have been cancelled as in a branch misprediction and their requests

should

have been removed from the LSQ to avoid inconsistency.

Can anybody think of why I am having such a problem/how can I solve 
it?

I

can provide specifics once the discussion starts.

Thank you in advance.
Cheers,

---
Rodolfo Wottrich
___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev





___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev

___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


[gem5-dev] Cron <m5test@zizzer> /z/m5/regression/do-regression quick

2016-08-22 Thread Cron Daemon
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/linux/minor-timing: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/linux/o3-timing: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/linux/simple-atomic: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/linux/simple-timing: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/linux/simple-timing-ruby: 
passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/tru64/minor-timing: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/tru64/o3-timing: passed.
 * build/ALPHA/tests/opt/quick/se/00.hello/alpha/tru64/simple-atomic: 
passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing: passed.
* build/ALPHA/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing-ruby: 
passed.
* build/ALPHA/tests/opt/quick/se/01.hello-2T-smt/alpha/linux/o3-timing-mt: 
passed.
* 
build/ALPHA/tests/opt/quick/se/03.learning-gem5/alpha/linux/learning-gem5-p1-two-level:
 passed.* 
build/ALPHA/tests/opt/quick/se/03.learning-gem5/alpha/linux/learning-gem5-p1-simple:
 passed.
* build/ALPHA/tests/opt/quick/se/30.eon/alpha/tru64/simple-atomic: 
passed.* 
build/ALPHA/tests/opt/quick/se/50.memtest/alpha/linux/memtest-ruby: passed.
* build/ALPHA/tests/opt/quick/se/50.vortex/alpha/tru64/simple-atomic: 
passed.
* build/ALPHA/tests/opt/quick/se/50.vortex/alpha/tru64/simple-timing: 
passed.
* build/ALPHA/tests/opt/quick/se/60.rubytest/alpha/linux/rubytest-ruby: 
passed.
* build/ALPHA/tests/opt/quick/se/70.twolf/alpha/tru64/simple-atomic: passed.
* 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-atomic: 
passed.
* 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing: 
passed.* 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual:
 passed.
* 
build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual:
 passed.
* build/ALPHA/tests/opt/quick/se/70.twolf/alpha/tru64/simple-timing: passed.
* 
build/ALPHA/tests/opt/quick/fs/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic:
 passed.
* 
build/ALPHA_MOESI_hammer/tests/opt/quick/se/00.hello/alpha/linux/simple-timing-ruby-MOESI_hammer:
 passed.
 * 
build/ALPHA_MOESI_hammer/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing-ruby-MOESI_hammer:
 passed.* 
build/ALPHA_MOESI_hammer/tests/opt/quick/se/60.rubytest/alpha/linux/rubytest-ruby-MOESI_hammer:
 passed.
* 
build/ALPHA_MOESI_hammer/tests/opt/quick/se/50.memtest/alpha/linux/memtest-ruby-MOESI_hammer:
 passed.
* 
build/ALPHA_MESI_Two_Level/tests/opt/quick/se/00.hello/alpha/linux/simple-timing-ruby-MESI_Two_Level:
 passed.
* 
build/ALPHA_MESI_Two_Level/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing-ruby-MESI_Two_Level:
 passed.
* 
build/ALPHA_MESI_Two_Level/tests/opt/quick/se/50.memtest/alpha/linux/memtest-ruby-MESI_Two_Level:
 passed.
* 
build/ALPHA_MESI_Two_Level/tests/opt/quick/se/60.rubytest/alpha/linux/rubytest-ruby-MESI_Two_Level:
 passed.
* 
build/ALPHA_MOESI_CMP_directory/tests/opt/quick/se/00.hello/alpha/linux/simple-timing-ruby-MOESI_CMP_directory:
 passed.
* 
build/ALPHA_MOESI_CMP_directory/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing-ruby-MOESI_CMP_directory:
 passed.
* 
build/ALPHA_MOESI_CMP_directory/tests/opt/quick/se/50.memtest/alpha/linux/memtest-ruby-MOESI_CMP_directory:
 passed.
* 
build/ALPHA_MOESI_CMP_directory/tests/opt/quick/se/60.rubytest/alpha/linux/rubytest-ruby-MOESI_CMP_directory:
 passed.
* 
build/ALPHA_MOESI_CMP_token/tests/opt/quick/se/00.hello/alpha/linux/simple-timing-ruby-MOESI_CMP_token:
 passed.
* 
build/ALPHA_MOESI_CMP_token/tests/opt/quick/se/00.hello/alpha/tru64/simple-timing-ruby-MOESI_CMP_token:
 passed.
* 
build/ALPHA_MOESI_CMP_token/tests/opt/quick/se/60.rubytest/alpha/linux/rubytest-ruby-MOESI_CMP_token:
 passed.
* 
build/ALPHA_MOESI_CMP_token/tests/opt/quick/se/50.memtest/alpha/linux/memtest-ruby-MOESI_CMP_token:
 passed.
* build/MIPS/tests/opt/quick/se/00.hello/mips/linux/o3-timing: passed.
* build/MIPS/tests/opt/quick/se/00.hello/mips/linux/simple-atomic: passed.
* build/MIPS/tests/opt/quick/se/00.hello/mips/linux/simple-timing: passed.
* build/MIPS/tests/opt/quick/se/00.hello/mips/linux/simple-timing-ruby: 
passed.
* 
build/MIPS/tests/opt/quick/se/03.learning-gem5/mips/linux/learning-gem5-p1-simple:
 passed.
 * 
build/MIPS/tests/opt/quick/se/03.learning-gem5/mips/linux/learning-gem5-p1-two-level:
 passed.
* build/NULL/tests/opt/quick/se/50.memtest/null/none/memtest: passed.
* build/NULL/tests/opt/quick/se/50.memtest/null/none/memtest-filter: passed.
* build/NULL/tests/opt/quick/se/70.tgen/null/none/tgen-dram-ctrl: passed.
* build/NULL/tests/opt/quick/se/70.tgen/null/none/tgen-simple-mem: passed.
* build/NULL/tests/opt/quick/se/51.memcheck/null/none/memcheck: passed.
*