# HG changeset patch
# User Derek Hower <[email protected]>
# Date 1263942696 21600
# Node ID 21fbf0412e0d63246d1070d29f0f338ba62394eb
# Parent 3d308cbd16572d4b672fb01e7ad1ad61591b7831
ruby: new atomics implementation
This patch changes the way that Ruby handles atomic RMW instructions. This
implementation, unlike the prior one, is protocol independent. It works by
locking an address from the sequencer immediately after the read portion of an
RMW completes. When that address is locked, the coherence controller will only
satisfy requests coming from one port (e.g., the mandatory queue) and will
ignore all others. After the write portion completed, the line is unlocked.
This should also work with multi-line atomics, as long as the blocks are always
acquired in the same order.
diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -238,7 +238,7 @@
// Response IntraChip L1 Network - response msg to this L1 cache
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
if (responseIntraChipL1Network_in.isReady()) {
- peek(responseIntraChipL1Network_in, ResponseMsg) {
+ peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
trigger(Event:Data_Exclusive, in_msg.Address);
@@ -271,7 +271,7 @@
// Request InterChip network - request from this L1 cache to the shared L2
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
if(requestIntraChipL1Network_in.isReady()) {
- peek(requestIntraChipL1Network_in, RequestMsg) {
+ peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if (in_msg.Type == CoherenceRequestType:INV) {
trigger(Event:Inv, in_msg.Address);
@@ -292,7 +292,7 @@
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
- peek(mandatoryQueue_in, CacheMsg) {
+ peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in
D-cache
diff --git a/src/mem/protocol/MI_example-cache.sm
b/src/mem/protocol/MI_example-cache.sm
--- a/src/mem/protocol/MI_example-cache.sm
+++ b/src/mem/protocol/MI_example-cache.sm
@@ -138,7 +138,7 @@
in_port(forwardRequestNetwork_in, RequestMsg, forwardToCache) {
if (forwardRequestNetwork_in.isReady()) {
- peek(forwardRequestNetwork_in, RequestMsg) {
+ peek(forwardRequestNetwork_in, RequestMsg, block_on="Address") {
if (in_msg.Type == CoherenceRequestType:GETX) {
trigger(Event:Fwd_GETX, in_msg.Address);
}
@@ -160,7 +160,7 @@
in_port(responseNetwork_in, ResponseMsg, responseToCache) {
if (responseNetwork_in.isReady()) {
- peek(responseNetwork_in, ResponseMsg) {
+ peek(responseNetwork_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:DATA) {
trigger(Event:Data, in_msg.Address);
}
@@ -174,7 +174,7 @@
// Mandatory Queue
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
- peek(mandatoryQueue_in, CacheMsg) {
+ peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
if (cacheMemory.isTagPresent(in_msg.LineAddress) == false &&
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -306,7 +306,7 @@
// Request Network
in_port(requestNetwork_in, RequestMsg, requestToL1Cache) {
if (requestNetwork_in.isReady()) {
- peek(requestNetwork_in, RequestMsg) {
+ peek(requestNetwork_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
DEBUG_EXPR("MRM_DEBUG: L1 received");
DEBUG_EXPR(in_msg.Type);
@@ -338,7 +338,7 @@
// Response Network
in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
if (responseToL1Cache_in.isReady()) {
- peek(responseToL1Cache_in, ResponseMsg) {
+ peek(responseToL1Cache_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:ACK) {
trigger(Event:Ack, in_msg.Address);
} else if (in_msg.Type == CoherenceResponseType:DATA) {
@@ -356,7 +356,7 @@
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
- peek(mandatoryQueue_in, CacheMsg) {
+ peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in
D-cache
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh
b/src/mem/ruby/buffers/MessageBuffer.hh
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -64,6 +64,11 @@
(m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime()));
}
+ void delayHead() {
+ MessageBufferNode node = m_prio_heap.extractMin();
+ enqueue(node.m_msgptr, 1);
+ }
+
bool areNSlotsAvailable(int n);
int getPriority() { return m_priority_rank; }
void setPriority(int rank) { m_priority_rank = rank; }
diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb
b/src/mem/ruby/config/MI_example-homogeneous.rb
--- a/src/mem/ruby/config/MI_example-homogeneous.rb
+++ b/src/mem/ruby/config/MI_example-homogeneous.rb
@@ -13,7 +13,7 @@
# default values
num_cores = 2
-l1_cache_size_kb = 32768
+l1_cache_size_bytes = 32768
l1_cache_assoc = 8
l1_cache_latency = 1
num_memories = 2
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
--- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -68,8 +68,8 @@
require protocol+".rb"
num_cores.times { |n|
- icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb,
l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
- dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb,
l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+ icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024,
l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+ dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024,
l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
iface_ports << sequencer
if protocol == "MOESI_CMP_directory"
@@ -87,7 +87,7 @@
end
}
num_l2_banks.times { |n|
- cache = SetAssociativeCache.new("l2u_"+n.to_s,
l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
+ cache = SetAssociativeCache.new("l2u_"+n.to_s,
(l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc,
"PSEUDO_LRU")
if protocol == "MOESI_CMP_directory"
net_ports <<
MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -385,12 +385,12 @@
end
class Cache < LibRubyObject
- param :size_kb, Integer
+ param :size, Integer
param :latency, Integer
param :controller, NetPort
- def initialize(obj_name, size_kb, latency)
+ def initialize(obj_name, size, latency)
super(obj_name)
- self.size_kb = size_kb
+ self.size = size
self.latency = latency
# controller must be set manually by the configuration script
# because there is a cyclic dependence
@@ -406,8 +406,8 @@
# when an integer, it represents the number of cycles for a hit
# when a float, it represents the cache access time in ns
# when set to "auto", libruby will attempt to find a realistic latency by
running CACTI
- def initialize(obj_name, size_kb, latency, assoc, replacement_policy)
- super(obj_name, size_kb, latency)
+ def initialize(obj_name, size, latency, assoc, replacement_policy)
+ super(obj_name, size, latency)
self.assoc = assoc
self.replacement_policy = replacement_policy
end
@@ -415,7 +415,7 @@
def calculateLatency()
if self.latency == "auto"
cacti_args = Array.new()
- cacti_args << (self.size_kb*1024) << RubySystem.block_size_bytes <<
self.assoc
+ cacti_args << (self.size*1024) << RubySystem.block_size_bytes <<
self.assoc
cacti_args << 1 << 0 << 0 << 0 << 1
cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1
diff --git a/src/mem/ruby/libruby.cc b/src/mem/ruby/libruby.cc
--- a/src/mem/ruby/libruby.cc
+++ b/src/mem/ruby/libruby.cc
@@ -58,11 +58,8 @@
ostream& operator<<(ostream& out, const RubyRequestType& obj)
{
- cerr << "in op" << endl;
out << RubyRequestType_to_string(obj);
- cerr << "flushing" << endl;
out << flush;
- cerr << "done" << endl;
return out;
}
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh
b/src/mem/ruby/slicc_interface/AbstractController.hh
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -21,9 +21,8 @@
virtual const string toString() const = 0; // returns text version of
controller type
virtual const string getName() const = 0; // return instance name
virtual const MachineType getMachineType() const = 0;
- virtual void set_atomic(Address addr) = 0;
- virtual void clear_atomic(Address addr) = 0;
- virtual void reset_atomics() = 0;
+ virtual void blockOnQueue(Address, MessageBuffer*) = 0;
+ virtual void unblock(Address) = 0;
virtual void print(ostream & out) const = 0;
virtual void printStats(ostream & out) const = 0;
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -66,8 +66,6 @@
m_instCache_ptr = NULL;
m_dataCache_ptr = NULL;
m_controller = NULL;
- m_atomic_reads = 0;
- m_atomic_writes = 0;
for (size_t i=0; i<argv.size(); i+=2) {
if ( argv[i] == "controller") {
m_controller = RubySystem::getController(argv[i+1]); // args[i] =
"L1Cache"
@@ -285,15 +283,15 @@
(request->ruby_request.type == RubyRequestType_RMW_Write) ||
(request->ruby_request.type == RubyRequestType_Locked_Read) ||
(request->ruby_request.type == RubyRequestType_Locked_Write));
- // POLINA: the assumption is that atomics are only on data cache and not
instruction cache
+
if (request->ruby_request.type == RubyRequestType_Locked_Read) {
m_dataCache_ptr->setLocked(address, m_version);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
- m_controller->set_atomic(address);
+ m_controller->blockOnQueue(address, m_mandatory_q_ptr);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
- m_controller->clear_atomic(address);
+ m_controller->unblock(address);
}
hitCallback(request, data);
@@ -438,42 +436,12 @@
CacheRequestType ctype;
switch(request.type) {
case RubyRequestType_IFETCH:
- if (m_atomic_reads > 0 && m_atomic_writes == 0) {
- m_controller->reset_atomics();
- m_atomic_writes = 0;
- m_atomic_reads = 0;
- }
- else if (m_atomic_writes > 0) {
- assert(m_atomic_reads > m_atomic_writes);
- cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but
only received: " << m_atomic_writes << endl;
- assert(false);
- }
ctype = CacheRequestType_IFETCH;
break;
case RubyRequestType_LD:
- if (m_atomic_reads > 0 && m_atomic_writes == 0) {
- m_controller->reset_atomics();
- m_atomic_writes = 0;
- m_atomic_reads = 0;
- }
- else if (m_atomic_writes > 0) {
- assert(m_atomic_reads > m_atomic_writes);
- cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but
only received: " << m_atomic_writes << endl;
- assert(false);
- }
ctype = CacheRequestType_LD;
break;
case RubyRequestType_ST:
- if (m_atomic_reads > 0 && m_atomic_writes == 0) {
- m_controller->reset_atomics();
- m_atomic_writes = 0;
- m_atomic_reads = 0;
- }
- else if (m_atomic_writes > 0) {
- assert(m_atomic_reads > m_atomic_writes);
- cerr << "WARNING: Expected: " << m_atomic_reads << " RMW_Writes, but
only received: " << m_atomic_writes << endl;
- assert(false);
- }
ctype = CacheRequestType_ST;
break;
case RubyRequestType_Locked_Read:
@@ -481,18 +449,9 @@
ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Read:
- assert(m_atomic_writes == 0);
- m_atomic_reads++;
ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Write:
- assert(m_atomic_reads > 0);
- assert(m_atomic_writes < m_atomic_reads);
- m_atomic_writes++;
- if (m_atomic_reads == m_atomic_writes) {
- m_atomic_reads = 0;
- m_atomic_writes = 0;
- }
ctype = CacheRequestType_ATOMIC;
break;
default:
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -128,8 +128,6 @@
// Global outstanding request count, across all request tables
int m_outstanding_count;
bool m_deadlock_check_scheduled;
- int m_atomic_reads;
- int m_atomic_writes;
int m_store_waiting_on_load_cycles;
int m_store_waiting_on_store_cycles;
diff --git a/src/mem/slicc/ast/PeekStatementAST.py
b/src/mem/slicc/ast/PeekStatementAST.py
--- a/src/mem/slicc/ast/PeekStatementAST.py
+++ b/src/mem/slicc/ast/PeekStatementAST.py
@@ -29,8 +29,8 @@
from slicc.symbols import Var
class PeekStatementAST(StatementAST):
- def __init__(self, slicc, queue_name, type_ast, statements, method):
- super(PeekStatementAST, self).__init__(slicc)
+ def __init__(self, slicc, queue_name, type_ast, pairs, statements, method):
+ super(PeekStatementAST, self).__init__(slicc, pairs)
self.queue_name = queue_name
self.type_ast = type_ast
@@ -63,6 +63,17 @@
in_msg_ptr = dynamic_cast<const $mtid *>(($qcode).${{self.method}}());
assert(in_msg_ptr != NULL);
''')
+ if self.pairs.has_key("block_on"):
+ address_field = self.pairs['block_on']
+ code('''
+ if ( (m_is_blocking == true) &&
+ (m_block_map.count(in_msg_ptr->m_$address_field) == 1) ) {
+ if (m_block_map[in_msg_ptr->m_$address_field] != &$qcode) {
+ $qcode.delayHead();
+ continue;
+ }
+ }
+ ''')
# The other statements
self.statements.generate(code, return_type)
diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py
--- a/src/mem/slicc/parser.py
+++ b/src/mem/slicc/parser.py
@@ -514,8 +514,8 @@
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[6], p[8])
def p_statement__peek(self, p):
- "statement : PEEK '(' var ',' type ')' statements"
- p[0] = ast.PeekStatementAST(self, p[3], p[5], p[7], "peek")
+ "statement : PEEK '(' var ',' type pairs ')' statements"
+ p[0] = ast.PeekStatementAST(self, p[3], p[5], p[6], p[8], "peek")
def p_statement__copy_head(self, p):
"statement : COPY_HEAD '(' var ',' var pairs ')' SEMI"
diff --git a/src/mem/slicc/symbols/StateMachine.py
b/src/mem/slicc/symbols/StateMachine.py
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -185,11 +185,10 @@
void print(ostream& out) const;
void printConfig(ostream& out) const;
void wakeup();
- void set_atomic(Address addr);
- void clear_atomic(Address addr);
- void reset_atomics();
void printStats(ostream& out) const { s_profiler.dumpStats(out); }
void clearStats() { s_profiler.clearStats(); }
+ void blockOnQueue(Address addr, MessageBuffer* port);
+ void unblock(Address addr);
private:
''')
@@ -198,16 +197,6 @@
for param in self.config_parameters:
code('int m_${{param.ident}};')
- if self.ident == "L1Cache":
- code('''
-int servicing_atomic;
-Address locked_read_request1;
-Address locked_read_request2;
-Address locked_read_request3;
-Address locked_read_request4;
-int read_counter;
-''')
-
code('''
int m_number_of_TBEs;
@@ -221,6 +210,8 @@
NodeID m_version;
Network* m_net_ptr;
MachineID m_machineID;
+bool m_is_blocking;
+map< Address, MessageBuffer* > m_block_map;
${ident}_Profiler s_profiler;
static int m_num_controllers;
// Internal functions
@@ -297,15 +288,6 @@
{
''')
code.indent()
- if self.ident == "L1Cache":
- code('''
-servicing_atomic = 0;
-locked_read_request1 = Address(-1);
-locked_read_request2 = Address(-1);
-locked_read_request3 = Address(-1);
-locked_read_request4 = Address(-1);
-read_counter = 0;
-''')
code('m_num_controllers++;')
for var in self.objects:
@@ -515,6 +497,17 @@
return MachineType_${ident};
}
+void $c_ident::blockOnQueue(Address addr, MessageBuffer* port) {
+ m_is_blocking = true;
+ m_block_map[addr] = port;
+}
+void $c_ident::unblock(Address addr) {
+ m_block_map.erase(addr);
+ if (m_block_map.size() == 0) {
+ m_is_blocking = false;
+ }
+}
+
void $c_ident::print(ostream& out) const { out << "[$c_ident " << m_version <<
"]"; }
void $c_ident::printConfig(ostream& out) const {
@@ -580,54 +573,12 @@
# InPorts
#
- # Find the position of the mandatory queue in the vector so
- # that we can print it out first
-
- mandatory_q = None
- if self.ident == "L1Cache":
- for i,port in enumerate(self.in_ports):
- assert "c_code_in_port" in port
- if str(port).find("mandatoryQueue_in") >= 0:
- assert mandatory_q is None
- mandatory_q = port
-
- assert mandatory_q is not None
-
- # print out the mandatory queue here
- port = mandatory_q
- code('// ${ident}InPort $port')
- output = port["c_code_in_port"]
-
- code('$output')
-
for port in self.in_ports:
- # don't print out mandatory queue twice
- if port == mandatory_q:
- continue
-
- if ident == "L1Cache":
- if (str(port).find("forwardRequestNetwork_in") >= 0 or
str(port).find("requestNetwork_in") >= 0 or
str(port).find("requestIntraChipL1Network_in") >= 0):
- code('''
-bool postpone = false;
-if ((((*m_L1Cache_forwardToCache_ptr)).isReady())) {
- const RequestMsg* in_msg_ptr;
- in_msg_ptr = dynamic_cast<const
RequestMsg*>(((*m_L1Cache_forwardToCache_ptr)).peek());
- if ((((servicing_atomic > 0) && (locked_read_request1 ==
((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address
|| locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 ==
((*in_msg_ptr)).m_Address)))) {
- postpone = true;
- }
-}
-if (!postpone) {
-''')
code.indent()
code('// ${ident}InPort $port')
code('${{port["c_code_in_port"]}}')
code.dedent()
- if ident == "L1Cache":
- if (str(port).find("forwardRequestNetwork_in") >= 0 or
str(port).find("requestNetwork_in") >= 0 or
str(port).find("requestIntraChipL1Network_in") >= 0):
- code.dedent()
- code('}')
- code.indent()
code('')
code.dedent()
@@ -638,83 +589,6 @@
}
''')
- if self.ident == "L1Cache":
- code('''
-void ${ident}_Controller::set_atomic(Address addr)
-{
- servicing_atomic++;
- switch (servicing_atomic) {
- case(1):
- assert(locked_read_request1 == Address(-1));
- locked_read_request1 = addr;
- break;
- case(2):
- assert(locked_read_request2 == Address(-1));
- locked_read_request2 = addr;
- break;
- case(3):
- assert(locked_read_request3 == Address(-1));
- locked_read_request3 = addr;
- break;
- case(4):
- assert(locked_read_request4 == Address(-1));
- locked_read_request4 = addr;
- break;
- default:
- assert(0);
-
- }
-}
-
-void ${ident}_Controller::clear_atomic(Address addr)
-{
-
- assert(servicing_atomic > 0);
- if (addr == locked_read_request1)
- locked_read_request1 = Address(-1);
- else if (addr == locked_read_request2)
- locked_read_request2 = Address(-1);
- else if (addr == locked_read_request3)
- locked_read_request3 = Address(-1);
- else if (addr == locked_read_request4)
- locked_read_request4 = Address(-1);
- else
- assert(0);
- servicing_atomic--;
-
-}
-
-void ${ident}_Controller::reset_atomics()
-{
-
- servicing_atomic = 0;
- locked_read_request1 = Address(-1);
- locked_read_request2 = Address(-1);
- locked_read_request3 = Address(-1);
- locked_read_request4 = Address(-1);
-
-}
-
-''')
- else:
- code('''
-void ${ident}_Controller::reset_atomics()
-{
- assert(0);
-}
-
-void ${ident}_Controller::set_atomic(Address addr)
-{
- assert(0);
-}
-
-void ${ident}_Controller::clear_atomic(Address addr)
-{
- assert(0);
-}
-''')
-
-
code.write(path, "%s_Wakeup.cc" % self.ident)
def printCSwitch(self, path):
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev