[m5-dev] changeset in m5: ruby: added probe filter support to hammer

Brad Beckmann Fri, 20 Aug 2010 17:46:33 -0700

changeset 3559d47839a1 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=3559d47839a1
description:
        ruby: added probe filter support to hammer


diffstat:

 configs/ruby/MOESI_CMP_token.py        |    8 +-
 configs/ruby/MOESI_hammer.py           |   41 ++-
 src/mem/protocol/MOESI_hammer-cache.sm |   91 ++++-
 src/mem/protocol/MOESI_hammer-dir.sm   |  559 +++++++++++++++++++++++++++++---
 src/mem/protocol/MOESI_hammer-msg.sm   |    7 +-
 src/mem/ruby/system/Cache.py           |    1 +
 src/mem/ruby/system/CacheMemory.cc     |    5 +-
 src/mem/ruby/system/CacheMemory.hh     |    1 +
 8 files changed, 614 insertions(+), 99 deletions(-)

diffs (truncated from 1449 to 300 lines):

diff -r 406e98960def -r 3559d47839a1 configs/ruby/MOESI_CMP_token.py
--- a/configs/ruby/MOESI_CMP_token.py   Fri Aug 20 11:46:13 2010 -0700
+++ b/configs/ruby/MOESI_CMP_token.py   Fri Aug 20 11:46:14 2010 -0700
@@ -81,6 +81,7 @@
     # Must create the individual controllers before the network to ensure the
     # controller constructors are called before the network constructor
     #
+    l2_bits = int(math.log(options.num_l2caches, 2))
     
     for i in xrange(options.num_cpus):
         #
@@ -104,9 +105,7 @@
                                       sequencer = cpu_seq,
                                       L1IcacheMemory = l1i_cache,
                                       L1DcacheMemory = l1d_cache,
-                                      l2_select_num_bits = \
-                                        math.log(options.num_l2caches,
-                                                 2),
+                                      l2_select_num_bits = l2_bits,
                                       N_tokens = n_tokens,
                                       retry_threshold = \
                                         options.l1_retries,
@@ -129,7 +128,8 @@
         # First create the Ruby objects associated with this cpu
         #
         l2_cache = L2Cache(size = options.l2_size,
-                           assoc = options.l2_assoc)
+                           assoc = options.l2_assoc,
+                           start_index_bit = l2_bits)
 
         l2_cntrl = L2Cache_Controller(version = i,
                                       L2cacheMemory = l2_cache,
diff -r 406e98960def -r 3559d47839a1 configs/ruby/MOESI_hammer.py
--- a/configs/ruby/MOESI_hammer.py      Fri Aug 20 11:46:13 2010 -0700
+++ b/configs/ruby/MOESI_hammer.py      Fri Aug 20 11:46:14 2010 -0700
@@ -27,6 +27,7 @@
 #
 # Authors: Brad Beckmann
 
+import math
 import m5
 from m5.objects import *
 from m5.defines import buildEnv
@@ -43,10 +44,18 @@
 class L2Cache(RubyCache):
     latency = 10
 
+#
+# Probe filter is a cache, latency is not used
+#
+class ProbeFilter(RubyCache):
+    latency = 1
+
 def define_options(parser):
     parser.add_option("--allow-atomic-migration", action="store_true",
           help="allow migratory sharing for atomic only accessed blocks")
-
+    parser.add_option("--pf-on", action="store_true",
+          help="Hammer: enable Probe Filter")
+    
 def create_system(options, system, piobus, dma_devices):
     
     if buildEnv['PROTOCOL'] != 'MOESI_hammer':
@@ -107,6 +116,29 @@
                       long(system.physmem.range.first) + 1
     mem_module_size = phys_mem_size / options.num_dirs
 
+    #
+    # determine size and index bits for probe filter
+    # By default, the probe filter size is configured to be twice the
+    # size of the L2 cache.
+    #
+    pf_size = MemorySize(options.l2_size)
+    pf_size.value = pf_size.value * 2
+    dir_bits = int(math.log(options.num_dirs, 2))
+    pf_bits = int(math.log(pf_size.value, 2))
+    if options.numa_high_bit:
+        if options.numa_high_bit > 0:
+            # if numa high bit explicitly set, make sure it does not overlap
+            # with the probe filter index
+            assert(options.numa_high_bit - dir_bits > pf_bits)
+
+        # set the probe filter start bit to just above the block offset
+        pf_start_bit = 6
+    else:
+        if dir_bits > 0:
+            pf_start_bit = dir_bits + 5
+        else:
+            pf_start_bit = 6
+
     for i in xrange(options.num_dirs):
         #
         # Create the Ruby objects associated with the directory controller
@@ -117,6 +149,8 @@
         dir_size = MemorySize('0B')
         dir_size.value = mem_module_size
 
+        pf = ProbeFilter(size = pf_size, assoc = 4)
+
         dir_cntrl = Directory_Controller(version = i,
                                          directory = \
                                          RubyDirectoryMemory( \
@@ -125,7 +159,10 @@
                                                     use_map = options.use_map,
                                                     map_levels = \
                                                     options.map_levels),
-                                         memBuffer = mem_cntrl)
+                                         probeFilter = pf,
+                                         memBuffer = mem_cntrl,
+                                         probe_filter_enabled = \
+                                           options.pf_on)
 
         exec("system.dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
diff -r 406e98960def -r 3559d47839a1 src/mem/protocol/MOESI_hammer-cache.sm
--- a/src/mem/protocol/MOESI_hammer-cache.sm    Fri Aug 20 11:46:13 2010 -0700
+++ b/src/mem/protocol/MOESI_hammer-cache.sm    Fri Aug 20 11:46:14 2010 -0700
@@ -96,6 +96,7 @@
     Other_GETX,      desc="A GetX from another processor";
     Other_GETS,      desc="A GetS from another processor";
     Other_GETS_No_Mig, desc="A GetS from another processor";
+    Invalidate,      desc="Invalidate block";
 
     // Responses
     Ack,             desc="Received an ack message";
@@ -292,6 +293,8 @@
           } else {
             trigger(Event:Other_GETS, in_msg.Address);
           }
+        } else if (in_msg.Type == CoherenceRequestType:INV) {
+          trigger(Event:Invalidate, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
           trigger(Event:Writeback_Ack, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
@@ -445,7 +448,11 @@
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
         out_msg.Dirty := getCacheEntry(address).Dirty;
-        out_msg.Acks := 2;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
         out_msg.MessageSize := MessageSizeType:Response_Data;
       }
     }
@@ -470,7 +477,11 @@
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
         out_msg.Dirty := getCacheEntry(address).Dirty;
-        out_msg.Acks := 2;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
         out_msg.MessageSize := MessageSizeType:Response_Data;
       }
     }
@@ -484,8 +495,13 @@
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := getCacheEntry(address).DataBlk;
+        DEBUG_EXPR(out_msg.DataBlk);
         out_msg.Dirty := getCacheEntry(address).Dirty;
-        out_msg.Acks := 2;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
         out_msg.MessageSize := MessageSizeType:Response_Data;
       }
     }
@@ -499,6 +515,7 @@
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.Acks := 1;
+        assert(in_msg.DirectedProbe == false);
         out_msg.MessageSize := MessageSizeType:Response_Control;
       }
     }
@@ -512,6 +529,7 @@
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.Acks := 1;
+        assert(in_msg.DirectedProbe == false);
         out_msg.MessageSize := MessageSizeType:Response_Control;
       }
     }
@@ -527,6 +545,26 @@
     }
   }
 
+  action(gm_sendUnblockM, "gm", desc="Send unblock to memory and indicate 
M/O/E state") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      out_msg.Address := address;
+      out_msg.Type := CoherenceResponseType:UNBLOCKM;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+    }
+  }
+
+  action(gs_sendUnblockS, "gs", desc="Send unblock to memory and indicate S 
state") {
+    enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
+      out_msg.Address := address;
+      out_msg.Type := CoherenceResponseType:UNBLOCKS;
+      out_msg.Sender := machineID;
+      out_msg.Destination.add(map_Address_to_Directory(address));
+      out_msg.MessageSize := MessageSizeType:Unblock_Control;
+    }
+  }
+
   action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
     DEBUG_EXPR(getCacheEntry(address).DataBlk);
 
@@ -653,9 +691,14 @@
         out_msg.Type := CoherenceResponseType:DATA;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
+        DEBUG_EXPR(out_msg.Destination);
         out_msg.DataBlk := TBEs[address].DataBlk;
         out_msg.Dirty := TBEs[address].Dirty;
-        out_msg.Acks := 2;
+        if (in_msg.DirectedProbe) {
+          out_msg.Acks := machineCount(MachineType:L1Cache);
+        } else {
+          out_msg.Acks := 2;
+        }
         out_msg.MessageSize := MessageSizeType:Response_Data;
       }
     }
@@ -719,9 +762,11 @@
 
   action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it 
was same as before") {
     peek(responseToCache_in, ResponseMsg) {
+      DEBUG_EXPR(getCacheEntry(address).DataBlk);
+      DEBUG_EXPR(in_msg.DataBlk);
       assert(getCacheEntry(address).DataBlk == in_msg.DataBlk);
       getCacheEntry(address).DataBlk := in_msg.DataBlk;
-      getCacheEntry(address).Dirty := in_msg.Dirty;
+      getCacheEntry(address).Dirty := in_msg.Dirty || 
getCacheEntry(address).Dirty;
     }
   }
   
@@ -813,7 +858,7 @@
     zz_recycleMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, 
Other_GETS_No_Mig}) {
+  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, 
Other_GETS_No_Mig, Invalidate}) {
     // stall
   }
 
@@ -963,7 +1008,7 @@
     rr_deallocateL2CacheBlock;
   }
 
-  transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig}) {
+  transition(I, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -985,7 +1030,7 @@
     rr_deallocateL2CacheBlock;
   }
 
-  transition(S, Other_GETX, I) {
+  transition(S, {Other_GETX, Invalidate}, I) {
     f_sendAck;
     l_popForwardQueue;
   }
@@ -1015,7 +1060,7 @@
     rr_deallocateL2CacheBlock;
   }
 
-  transition(O, Other_GETX, I) {
+  transition(O, {Other_GETX, Invalidate}, I) {
     e_sendData;
     l_popForwardQueue;
   }
@@ -1042,7 +1087,7 @@
     rr_deallocateL2CacheBlock;
   }
 
-  transition(MM, Other_GETX, I) {
+  transition(MM, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
     l_popForwardQueue;
   }
@@ -1074,7 +1119,7 @@
     rr_deallocateL2CacheBlock;
   }
 
-  transition(M, Other_GETX, I) {
+  transition(M, {Other_GETX, Invalidate}, I) {
     c_sendExclusiveData;
     l_popForwardQueue;
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

[m5-dev] changeset in m5: ruby: added probe filter support to hammer

Reply via email to