Hi all,
I model a shared L3 while private L2 cache hierarchy with gem5. During
my experiments (running gem5 under classic memory model and SE mode), I
control the main simulation parameters as following.
1 core
private L1 dcache: 32KB/8-way; icache: 32KB/4-way
private L2 cache: 256KB/8-way
shared L3 cahce: 1MB/16-way
I select workloads from SPEC CPU 2K6. The first 500 million instructions
are fast forwarded and then 500 millions instructions are cache warmed
up. Later another 500 million instructions are detailed simulated with
O3 cpu.
I get the simulation stats as following (these stats are about
'switch_cpus_1' with detailed measurement, 1st column is the benchmark
name while the 2nd name is the corresponding metric stat)
( A ). L1D$$ miss rate:
401 0.039705
403 0.154247
410 0.107384
450 0.169413
459 0.000102
462 0.031115
471 0.060141
( B ). L2$$ miss rate:
401 0.463055
403 0.900824
410 0.344414
450 0.820350
459 0.989815
462 0.997665
471 0.964760
( C ). L3$$ miss rate:
401 0.334149
403 0.291530
410 0.918030
450 0.561909
459 0.970418
462 0.989723
471 0.080612
I am surprised by these statistics. Several workloads (such as 403.gcc,
459.GemsFDTD, 462.libquantum and 471.omnetpp) have a large L2 and L3
cache miss rate, very close to 100%. I am not sure whether it is related
to my cache configuration settings or intrinsic behavior characteristics
of benchmarks. Attached please find related cache configuration files.
--------------------------
config/common/CacheConfig.py-----------------------------------------
if options.l3cache:
system.l3 = l3_cache_class(clock=options.clock,
size=options.l3_size,
assoc=options.l3_assoc,
block_size=options.cacheline_size)
system.tol3bus = CoherentBus(clock = options.clock, width = 32)
system.l3.cpu_side = system.tol3bus.master
system.l3.mem_side = system.membus.slave
else:
if options.l2cache:
system.l2 = l2_cache_class(clock=options.clock,
size=options.l2_size,
assoc=options.l2_assoc,
block_size=options.cacheline_size)
system.tol2bus = CoherentBus(clock = options.clock, width = 32)
system.l2.cpu_side = system.tol2bus.master
system.l2.mem_side = system.membus.slave
for i in xrange(options.num_cpus):
if options.caches:
icache = icache_class(size=options.l1i_size,
assoc=options.l1i_assoc,
block_size=options.cacheline_size)
dcache = dcache_class(size=options.l1d_size,
assoc=options.l1d_assoc,
block_size=options.cacheline_size)
if options.l3cache:
system.cpu[i].l2 = l2_cache_class(size = options.l2_size,
assoc = options.l2_assoc,
block_size =
options.cacheline_size)
system.cpu[i].tol2bus = CoherentBus()
system.cpu[i].l2.cpu_side = system.cpu[i].tol2bus.master
system.cpu[i].l2.mem_side = system.tol3bus.slave
if buildEnv['TARGET_ISA'] == 'x86':
system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
PageTableWalkerCache(),
PageTableWalkerCache())
else:
system.cpu[i].addPrivateSplitL1Caches(icache, dcache)
system.cpu[i].createInterruptController()
if options.l3cache:
system.cpu[i].connectAllPorts(system.cpu[i].tol2bus,
system.membus)
else:
if options.l2cache:
system.cpu[i].connectAllPorts(system.tol2bus,
system.membus)
else:
system.cpu[i].connectAllPorts(system.membus)
--------------------------
config/common/Caches.py-----------------------------------------
class L1Cache(BaseCache):
assoc = 2
hit_latency = 2
response_latency = 2
block_size = 64
mshrs = 4
tgts_per_mshr = 20
is_top_level = True
class L2Cache(BaseCache):
assoc = 8
block_size = 64
hit_latency = 8
response_latency = 8
mshrs = 16
tgts_per_mshr = 16
write_buffers = 8
class L3Cache(BaseCache):
assoc = 16
block_size = 64
hit_latency = 20
response_latency = 20
mshrs = 512
tgts_per_mshr = 20
write_buffers = 256
Any mistakes I missing?
Thanks in advance,
Hanfeng
# Copyright (c) 2012 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2010 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Lisa Hsu
# Configure the M5 cache hierarchy config in one place
#
import m5
from m5.objects import *
from Caches import *
def config_cache(options, system):
if options.cpu_type == "arm_detailed":
try:
from O3_ARM_v7a import *
except:
print "arm_detailed is unavailable. Did you compile the O3 model?"
sys.exit(1)
dcache_class, icache_class, l2_cache_class = \
O3_ARM_v7a_DCache, O3_ARM_v7a_ICache, O3_ARM_v7aL2
else:
dcache_class, icache_class, l2_cache_class, l3_cache_class = \
L1Cache, L1Cache, L2Cache, L3Cache
if options.l3cache:
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the
# same clock as the CPUs, and set the L1-to-L2 bus width to 32
# bytes (256 bits).
system.l3 = l3_cache_class(clock=options.clock,
size=options.l3_size,
assoc=options.l3_assoc,
block_size=options.cacheline_size)
system.tol3bus = CoherentBus(clock = options.clock, width = 32)
system.l3.cpu_side = system.tol3bus.master
system.l3.mem_side = system.membus.slave
else:
if options.l2cache:
system.l2 = l2_cache_class(clock=options.clock,
size=options.l2_size,
assoc=options.l2_assoc,
block_size=options.cacheline_size)
system.tol2bus = CoherentBus(clock = options.clock, width = 32)
system.l2.cpu_side = system.tol2bus.master
system.l2.mem_side = system.membus.slave
for i in xrange(options.num_cpus):
if options.caches:
icache = icache_class(size=options.l1i_size,
assoc=options.l1i_assoc,
block_size=options.cacheline_size)
dcache = dcache_class(size=options.l1d_size,
assoc=options.l1d_assoc,
block_size=options.cacheline_size)
if options.l3cache:
system.cpu[i].l2 = l2_cache_class(size = options.l2_size,
assoc = options.l2_assoc,
block_size = options.cacheline_size)
system.cpu[i].tol2bus = CoherentBus()
system.cpu[i].l2.cpu_side = system.cpu[i].tol2bus.master
system.cpu[i].l2.mem_side = system.tol3bus.slave
# system.cpu[i].l2.mem_side = system.membus.slave
# When connecting the caches, the clock is also inherited
# from the CPU in question
if buildEnv['TARGET_ISA'] == 'x86':
# if options.l3cache:
# system.cpu[i].addTwoLevelCacheHierarchy(icache, dcache, system.cpu[i].l2,
# PageTableWalkerCache(),
# PageTableWalkerCache())
# else:
system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
PageTableWalkerCache(),
PageTableWalkerCache())
else:
# if options.l3cache:
# system.cpu[i].addTwoLevelCacheHierarchy(icache, dcache, system.cpu[i].l2)
# else:
system.cpu[i].addPrivateSplitL1Caches(icache, dcache)
system.cpu[i].createInterruptController()
if options.l3cache:
system.cpu[i].connectAllPorts(system.cpu[i].tol2bus, system.membus)
#system.cpu[i].connectAllPorts(system.tol3bus, system.membus)
else:
if options.l2cache:
system.cpu[i].connectAllPorts(system.tol2bus, system.membus)
else:
system.cpu[i].connectAllPorts(system.membus)
return system
# Copyright (c) 2012 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006-2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Lisa Hsu
from m5.objects import *
# Base implementations of L1, L2, IO and TLB-walker caches. There are
# used in the regressions and also as base components in the
# system-configuration scripts. The values are meant to serve as a
# starting point, and specific parameters can be overridden in the
# specific instantiations.
class L1Cache(BaseCache):
assoc = 2
hit_latency = 2
response_latency = 2
block_size = 64
mshrs = 4
tgts_per_mshr = 20
is_top_level = True
class L2Cache(BaseCache):
assoc = 8
block_size = 64
#hit_latency = 20
#response_latency = 20
hit_latency = 8
response_latency = 8
#mshrs = 20
#tgts_per_mshr = 12
mshrs = 16
tgts_per_mshr = 16
write_buffers = 8
class L3Cache(BaseCache):
assoc = 16
block_size = 64
hit_latency = 20
response_latency = 20
mshrs = 512
tgts_per_mshr = 20
write_buffers = 256
class IOCache(BaseCache):
assoc = 8
block_size = 64
hit_latency = 50
response_latency = 50
mshrs = 20
size = '1kB'
tgts_per_mshr = 12
forward_snoops = False
is_top_level = True
class PageTableWalkerCache(BaseCache):
assoc = 2
block_size = 64
hit_latency = 2
response_latency = 2
mshrs = 10
size = '1kB'
tgts_per_mshr = 12
is_top_level = True
_______________________________________________
gem5-users mailing list
[email protected]
http://m5sim.org/cgi-bin/mailman/listinfo/gem5-users