changeset 3559d47839a1 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=3559d47839a1
description:
ruby: added probe filter support to hammer
diffstat:
configs/ruby/MOESI_CMP_token.py | 8 +-
configs/ruby/MOESI_hammer.py | 41 ++-
src/mem/protocol/MOESI_hammer-cache.sm | 91 ++++-
src/mem/protocol/MOESI_hammer-dir.sm | 559 +++++++++++++++++++++++++++++---
src/mem/protocol/MOESI_hammer-msg.sm | 7 +-
src/mem/ruby/system/Cache.py | 1 +
src/mem/ruby/system/CacheMemory.cc | 5 +-
src/mem/ruby/system/CacheMemory.hh | 1 +
8 files changed, 614 insertions(+), 99 deletions(-)
diffs (truncated from 1449 to 300 lines):
diff -r 406e98960def -r 3559d47839a1 configs/ruby/MOESI_CMP_token.py
--- a/configs/ruby/MOESI_CMP_token.py Fri Aug 20 11:46:13 2010 -0700
+++ b/configs/ruby/MOESI_CMP_token.py Fri Aug 20 11:46:14 2010 -0700
@@ -81,6 +81,7 @@
# Must create the individual controllers before the network to ensure the
# controller constructors are called before the network constructor
#
+ l2_bits = int(math.log(options.num_l2caches, 2))
for i in xrange(options.num_cpus):
#
@@ -104,9 +105,7 @@
sequencer = cpu_seq,
L1IcacheMemory = l1i_cache,
L1DcacheMemory = l1d_cache,
- l2_select_num_bits = \
- math.log(options.num_l2caches,
- 2),
+ l2_select_num_bits = l2_bits,
N_tokens = n_tokens,
retry_threshold = \
options.l1_retries,
@@ -129,7 +128,8 @@
# First create the Ruby objects associated with this cpu
#
l2_cache = L2Cache(size = options.l2_size,
- assoc = options.l2_assoc)
+ assoc = options.l2_assoc,
+ start_index_bit = l2_bits)
l2_cntrl = L2Cache_Controller(version = i,
L2cacheMemory = l2_cache,
diff -r 406e98960def -r 3559d47839a1 configs/ruby/MOESI_hammer.py
--- a/configs/ruby/MOESI_hammer.py Fri Aug 20 11:46:13 2010 -0700
+++ b/configs/ruby/MOESI_hammer.py Fri Aug 20 11:46:14 2010 -0700
@@ -27,6 +27,7 @@
#
# Authors: Brad Beckmann
+import math
import m5
from m5.objects import *
from m5.defines import buildEnv
@@ -43,10 +44,18 @@
class L2Cache(RubyCache):
latency = 10
+#
+# Probe filter is a cache, latency is not used
+#
+class ProbeFilter(RubyCache):
+ latency = 1
+
def define_options(parser):
parser.add_option("--allow-atomic-migration", action="store_true",
help="allow migratory sharing for atomic only accessed blocks")
-
+ parser.add_option("--pf-on", action="store_true",
+ help="Hammer: enable Probe Filter")
+
def create_system(options, system, piobus, dma_devices):
if buildEnv['PROTOCOL'] != 'MOESI_hammer':
@@ -107,6 +116,29 @@
long(system.physmem.range.first) + 1
mem_module_size = phys_mem_size / options.num_dirs
+ #
+ # determine size and index bits for probe filter
+ # By default, the probe filter size is configured to be twice the
+ # size of the L2 cache.
+ #
+ pf_size = MemorySize(options.l2_size)
+ pf_size.value = pf_size.value * 2
+ dir_bits = int(math.log(options.num_dirs, 2))
+ pf_bits = int(math.log(pf_size.value, 2))
+ if options.numa_high_bit:
+ if options.numa_high_bit > 0:
+ # if numa high bit explicitly set, make sure it does not overlap
+ # with the probe filter index
+ assert(options.numa_high_bit - dir_bits > pf_bits)
+
+ # set the probe filter start bit to just above the block offset
+ pf_start_bit = 6
+ else:
+ if dir_bits > 0:
+ pf_start_bit = dir_bits + 5
+ else:
+ pf_start_bit = 6
+
for i in xrange(options.num_dirs):
#
# Create the Ruby objects associated with the directory controller
@@ -117,6 +149,8 @@
dir_size = MemorySize('0B')
dir_size.value = mem_module_size
+ pf = ProbeFilter(size = pf_size, assoc = 4)
+
dir_cntrl = Directory_Controller(version = i,
directory = \
RubyDirectoryMemory( \
@@ -125,7 +159,10 @@
use_map = options.use_map,
map_levels = \
options.map_levels),
- memBuffer = mem_cntrl)
+ probeFilter = pf,
+ memBuffer = mem_cntrl,
+ probe_filter_enabled = \
+ options.pf_on)
exec("system.dir_cntrl%d = dir_cntrl" % i)
dir_cntrl_nodes.append(dir_cntrl)
diff -r 406e98960def -r 3559d47839a1 src/mem/protocol/MOESI_hammer-cache.sm
--- a/src/mem/protocol/MOESI_hammer-cache.sm Fri Aug 20 11:46:13 2010 -0700
+++ b/src/mem/protocol/MOESI_hammer-cache.sm Fri Aug 20 11:46:14 2010 -0700
@@ -96,6 +96,7 @@
Other_GETX, desc="A GetX from another processor";
Other_GETS, desc="A GetS from another processor";
Other_GETS_No_Mig, desc="A GetS from another processor";
+ Invalidate, desc="Invalidate block";
// Responses
Ack, desc="Received an ack message";
@@ -292,6 +293,8 @@
} else {
trigger(Event:Other_GETS, in_msg.Address);
}
+ } else if (in_msg.Type == CoherenceRequestType:INV) {
+ trigger(Event:Invalidate, in_msg.Address);
} else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
trigger(Event:Writeback_Ack, in_msg.Address);
} else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
@@ -445,7 +448,11 @@
out_msg.Destination.add(in_msg.Requestor);
out_msg.DataBlk := getCacheEntry(address).DataBlk;
out_msg.Dirty := getCacheEntry(address).Dirty;
- out_msg.Acks := 2;
+ if (in_msg.DirectedProbe) {
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ } else {
+ out_msg.Acks := 2;
+ }
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
@@ -470,7 +477,11 @@
out_msg.Destination.add(in_msg.Requestor);
out_msg.DataBlk := getCacheEntry(address).DataBlk;
out_msg.Dirty := getCacheEntry(address).Dirty;
- out_msg.Acks := 2;
+ if (in_msg.DirectedProbe) {
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ } else {
+ out_msg.Acks := 2;
+ }
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
@@ -484,8 +495,13 @@
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
out_msg.DataBlk := getCacheEntry(address).DataBlk;
+ DEBUG_EXPR(out_msg.DataBlk);
out_msg.Dirty := getCacheEntry(address).Dirty;
- out_msg.Acks := 2;
+ if (in_msg.DirectedProbe) {
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ } else {
+ out_msg.Acks := 2;
+ }
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
@@ -499,6 +515,7 @@
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
out_msg.Acks := 1;
+ assert(in_msg.DirectedProbe == false);
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
@@ -512,6 +529,7 @@
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
out_msg.Acks := 1;
+ assert(in_msg.DirectedProbe == false);
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
@@ -527,6 +545,26 @@
}
}
+ action(gm_sendUnblockM, "gm", desc="Send unblock to memory and indicate
M/O/E state") {
+ enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:UNBLOCKM;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ }
+ }
+
+ action(gs_sendUnblockS, "gs", desc="Send unblock to memory and indicate S
state") {
+ enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:UNBLOCKS;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Unblock_Control;
+ }
+ }
+
action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
@@ -653,9 +691,14 @@
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
out_msg.Destination.add(in_msg.Requestor);
+ DEBUG_EXPR(out_msg.Destination);
out_msg.DataBlk := TBEs[address].DataBlk;
out_msg.Dirty := TBEs[address].Dirty;
- out_msg.Acks := 2;
+ if (in_msg.DirectedProbe) {
+ out_msg.Acks := machineCount(MachineType:L1Cache);
+ } else {
+ out_msg.Acks := 2;
+ }
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
@@ -719,9 +762,11 @@
action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it
was same as before") {
peek(responseToCache_in, ResponseMsg) {
+ DEBUG_EXPR(getCacheEntry(address).DataBlk);
+ DEBUG_EXPR(in_msg.DataBlk);
assert(getCacheEntry(address).DataBlk == in_msg.DataBlk);
getCacheEntry(address).DataBlk := in_msg.DataBlk;
- getCacheEntry(address).Dirty := in_msg.Dirty;
+ getCacheEntry(address).Dirty := in_msg.Dirty ||
getCacheEntry(address).Dirty;
}
}
@@ -813,7 +858,7 @@
zz_recycleMandatoryQueue;
}
- transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS,
Other_GETS_No_Mig}) {
+ transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS,
Other_GETS_No_Mig, Invalidate}) {
// stall
}
@@ -963,7 +1008,7 @@
rr_deallocateL2CacheBlock;
}
- transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
+ transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
f_sendAck;
l_popForwardQueue;
}
@@ -985,7 +1030,7 @@
rr_deallocateL2CacheBlock;
}
- transition(S, Other_GETX, I) {
+ transition(S, {Other_GETX, Invalidate}, I) {
f_sendAck;
l_popForwardQueue;
}
@@ -1015,7 +1060,7 @@
rr_deallocateL2CacheBlock;
}
- transition(O, Other_GETX, I) {
+ transition(O, {Other_GETX, Invalidate}, I) {
e_sendData;
l_popForwardQueue;
}
@@ -1042,7 +1087,7 @@
rr_deallocateL2CacheBlock;
}
- transition(MM, Other_GETX, I) {
+ transition(MM, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
l_popForwardQueue;
}
@@ -1074,7 +1119,7 @@
rr_deallocateL2CacheBlock;
}
- transition(M, Other_GETX, I) {
+ transition(M, {Other_GETX, Invalidate}, I) {
c_sendExclusiveData;
l_popForwardQueue;
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev