[gem5-dev] changeset in gem5: mem: Add rank-wise refresh to the DRAM contro...

2014-12-23 Thread Omar Naji via gem5-dev
changeset bb665366cc00 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=bb665366cc00
description:
mem: Add rank-wise refresh to the DRAM controller

This patch adds rank-wise refresh to the controller, as opposed to the
channel-wide refresh currently in place. In essence each rank can be
refreshed independently, and for this to be possible the controller
is extended with a state machine per rank.

Without this patch the data bus is always idle during a refresh, as
all the ranks are refreshing at the same time. With the rank-wise
refresh it is possible to use one rank while another one is
refreshing, and thus the data bus can be kept busy.

The patch introduces a Rank class to encapsulate the state per rank,
and also shifts all the relevant banks, activation tracking etc to the
rank. The arbitration is also updated to consider the state of the rank.

diffstat:

 src/mem/dram_ctrl.cc |  717 --
 src/mem/dram_ctrl.hh |  360 -
 2 files changed, 637 insertions(+), 440 deletions(-)

diffs (truncated from 1621 to 300 lines):

diff -r 471d390943f0 -r bb665366cc00 src/mem/dram_ctrl.cc
--- a/src/mem/dram_ctrl.cc  Tue Dec 23 09:31:18 2014 -0500
+++ b/src/mem/dram_ctrl.cc  Tue Dec 23 09:31:18 2014 -0500
@@ -40,6 +40,7 @@
  * Authors: Andreas Hansson
  *  Ani Udipi
  *  Neha Agarwal
+ *  Omar Naji
  */
 
 #include base/bitfield.hh
@@ -59,8 +60,7 @@
 port(name() + .port, *this),
 retryRdReq(false), retryWrReq(false),
 busState(READ),
-nextReqEvent(this), respondEvent(this), activateEvent(this),
-prechargeEvent(this), refreshEvent(this), powerEvent(this),
+nextReqEvent(this), respondEvent(this),
 drainManager(NULL),
 deviceSize(p-device_size),
 deviceBusWidth(p-device_bus_width), burstLength(p-burst_length),
@@ -89,32 +89,19 @@
 maxAccessesPerRow(p-max_accesses_per_row),
 frontendLatency(p-static_frontend_latency),
 backendLatency(p-static_backend_latency),
-busBusyUntil(0), refreshDueAt(0), refreshState(REF_IDLE),
-pwrStateTrans(PWR_IDLE), pwrState(PWR_IDLE), prevArrival(0),
-nextReqTime(0), pwrStateTick(0), numBanksActive(0),
-activeRank(0), timeStampOffset(0)
+busBusyUntil(0), prevArrival(0),
+nextReqTime(0), activeRank(0), timeStampOffset(0)
 {
-// create the bank states based on the dimensions of the ranks and
-// banks
-banks.resize(ranksPerChannel);
+for (int i = 0; i  ranksPerChannel; i++) {
+Rank* rank = new Rank(*this, p);
+ranks.push_back(rank);
 
-//create list of drampower objects. For each rank 1 drampower instance.
-for (int i = 0; i  ranksPerChannel; i++) {
-DRAMPower drampower = DRAMPower(p, false);
-rankPower.emplace_back(drampower);
-}
+rank-actTicks.resize(activationLimit, 0);
+rank-banks.resize(banksPerRank);
+rank-rank = i;
 
-actTicks.resize(ranksPerChannel);
-for (size_t c = 0; c  ranksPerChannel; ++c) {
-banks[c].resize(banksPerRank);
-actTicks[c].resize(activationLimit, 0);
-}
-
-// set the bank indices
-for (int r = 0; r  ranksPerChannel; r++) {
 for (int b = 0; b  banksPerRank; b++) {
-banks[r][b].rank = r;
-banks[r][b].bank = b;
+rank-banks[b].bank = b;
 // GDDR addressing of banks to BG is linear.
 // Here we assume that all DRAM generations address bank groups as
 // follows:
@@ -126,10 +113,10 @@
 //banks 1,5,9,13  are in bank group 1
 //banks 2,6,10,14 are in bank group 2
 //banks 3,7,11,15 are in bank group 3
-banks[r][b].bankgr = b % bankGroupsPerRank;
+rank-banks[b].bankgr = b % bankGroupsPerRank;
 } else {
 // No bank groups; simply assign to bank number
-banks[r][b].bankgr = b;
+rank-banks[b].bankgr = b;
 }
 }
 }
@@ -254,19 +241,18 @@
 {
 // timestamp offset should be in clock cycles for DRAMPower
 timeStampOffset = divCeil(curTick(), tCK);
+
 // update the start tick for the precharge accounting to the
 // current tick
-pwrStateTick = curTick();
+for (auto r : ranks) {
+r-startup(curTick() + tREFI - tRP);
+}
 
 // shift the bus busy time sufficiently far ahead that we never
 // have to worry about negative values when computing the time for
 // the next request, this will add an insignificant bubble at the
 // start of simulation
 busBusyUntil = curTick() + tRP + tRCD + tCL;
-
-// kick off the refresh, and give ourselves enough time to
-// precharge
-schedule(refreshEvent, curTick() + tREFI - tRP);
 }
 
 Tick
@@ -411,7 +397,7 @@
 // later
 uint16_t 

[gem5-dev] changeset in gem5: mem: Fix a bug in the DRAM controller arbitra...

2014-12-23 Thread Omar Naji via gem5-dev
changeset 471d390943f0 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=471d390943f0
description:
mem: Fix a bug in the DRAM controller arbitration

Fix a minor issue that affects multi-rank systems.

diffstat:

 src/mem/dram_ctrl.cc |  12 +---
 1 files changed, 9 insertions(+), 3 deletions(-)

diffs (29 lines):

diff -r 6d4da9dc90a1 -r 471d390943f0 src/mem/dram_ctrl.cc
--- a/src/mem/dram_ctrl.cc  Tue Dec 23 09:31:18 2014 -0500
+++ b/src/mem/dram_ctrl.cc  Tue Dec 23 09:31:18 2014 -0500
@@ -1477,7 +1477,13 @@
 // Offset by tRCD to correlate with ACT timing variables
 Tick min_cmd_at = busBusyUntil - tCL - tRCD;
 
-// Prioritize same rank accesses that can issue B2B
+// if we have multiple ranks and all
+// waiting packets are accessing a rank which was previously active
+// then bank_mask_same_rank will be set to a value while bank_mask will
+// remain 0. In this case, the function should return the value of
+// bank_mask_same_rank.
+// else if waiting packets access a rank which was previously active and
+// other ranks, prioritize same rank accesses that can issue B2B
 // Only optimize for same ranks when the command type
 // does not change; do not want to unnecessarily incur tWTR
 //
@@ -1485,8 +1491,8 @@
 // 1) Commands that access the same rank as previous burst
 //and can prep the bank seamlessly.
 // 2) Commands (any rank) with earliest bank prep
-if (!switched_cmd_type  same_rank_match 
-min_act_at_same_rank = min_cmd_at) {
+if ((bank_mask == 0) || (!switched_cmd_type  same_rank_match 
+min_act_at_same_rank = min_cmd_at)) {
 bank_mask = bank_mask_same_rank;
 }
 
___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev


[gem5-dev] changeset in gem5: mem: Add a GDDR5 DRAM config

2014-12-02 Thread Omar Naji via gem5-dev
changeset e1a853349529 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=e1a853349529
description:
mem: Add a GDDR5 DRAM config

This patch adds a first cut GDDR5 config to accommodate the users
combining gem5 and GPUSim. The config is based on a SK Hynix
datasheet, and the Nvidia GTX580 specification. Someone from the
GPUSim user-camp should tweak the default page-policy and static
frontend and backend latencies.

diffstat:

 src/mem/DRAMCtrl.py  |  85 
 src/mem/dram_ctrl.cc |   6 +++-
 src/mem/drampower.cc |   5 +-
 3 files changed, 93 insertions(+), 3 deletions(-)

diffs (129 lines):

diff -r dd04eb06ad42 -r e1a853349529 src/mem/DRAMCtrl.py
--- a/src/mem/DRAMCtrl.py   Mon Nov 24 09:03:39 2014 -0500
+++ b/src/mem/DRAMCtrl.py   Tue Dec 02 06:07:32 2014 -0500
@@ -736,3 +736,88 @@
 IDD52 = '150mA'
 VDD = '1.8V'
 VDD2 = '1.2V'
+
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_x64(DRAMCtrl):
+# size of device
+device_size = '128MB'
+
+# 2x32 configuration, 1 device with a 32-bit interface
+device_bus_width = 32
+
+# GDDR5 is a BL8 device
+burst_length = 8
+
+# Each device has a page (row buffer) size of 2Kbits (256Bytes)
+device_rowbuffer_size = '256B'
+
+# 2x32 configuration, so 2 devices
+devices_per_rank = 2
+
+# assume single rank
+ranks_per_channel = 1
+
+# GDDR5 has 4 bank groups
+bank_groups_per_rank = 4
+
+# GDDR5 has 16 banks with 4 bank groups
+banks_per_rank = 16
+
+# 1000 MHz
+tCK = '1ns'
+
+# 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
+# Data bus runs @2000 Mhz = DDR ( data runs at 4000 MHz )
+# 8 beats at 4000 MHz = 2 beats at 1000 MHz
+# tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+# With bank group architectures, tBURST represents the CAS-to-CAS
+# delay for bursts to different bank groups (tCCD_S)
+tBURST = '2ns'
+
+# @1000MHz data rate, tCCD_L is 3 CK
+# CAS-to-CAS delay for bursts to the same bank group
+# tBURST is equivalent to tCCD_S; no explicit parameter required
+# for CAS-to-CAS delay for bursts to different bank groups
+tCCD_L = '3ns';
+
+tRCD = '12ns'
+
+# tCL is not directly found in datasheet and assumed equal tRCD
+tCL = '12ns'
+
+tRP = '12ns'
+tRAS = '28ns'
+
+# RRD_S (different bank group)
+# RRD_S is 5.5 ns in datasheet.
+# rounded to the next multiple of tCK
+tRRD = '6ns'
+
+# RRD_L (same bank group)
+# RRD_L is 5.5 ns in datasheet.
+# rounded to the next multiple of tCK
+tRRD_L = '6ns'
+
+tXAW = '23ns'
+
+# tXAW  4 x tRRD.
+# Therefore, activation limit is set to 0
+activation_limit = 0
+
+tRFC = '65ns'
+tWR = '12ns'
+
+# Here using the average of WTR_S and WTR_L
+tWTR = '5ns'
+
+# Read-to-Precharge 2 CK
+tRTP = '2ns'
+
+# Assume 2 cycles
+tRTW = '2ns'
+
+# Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
+tCS = '2ns'
+tREFI = '3.9us'
diff -r dd04eb06ad42 -r e1a853349529 src/mem/dram_ctrl.cc
--- a/src/mem/dram_ctrl.cc  Mon Nov 24 09:03:39 2014 -0500
+++ b/src/mem/dram_ctrl.cc  Tue Dec 02 06:07:32 2014 -0500
@@ -115,6 +115,9 @@
 for (int b = 0; b  banksPerRank; b++) {
 banks[r][b].rank = r;
 banks[r][b].bank = b;
+// GDDR addressing of banks to BG is linear.
+// Here we assume that all DRAM generations address bank groups as
+// follows:
 if (bankGroupArch) {
 // Simply assign lower bits to bank group in order to
 // rotate across bank groups as banks are incremented
@@ -224,7 +227,8 @@
   tCCD_L, tBURST, bankGroupsPerRank);
 }
 // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
-if (tRRD_L = tRRD) {
+// some datasheets might specify it equal to tRRD
+if (tRRD_L  tRRD) {
 fatal(tRRD_L (%d) should be larger than tRRD (%d) when 
   bank groups per rank (%d) is greater than 1\n,
   tRRD_L, tRRD, bankGroupsPerRank);
diff -r dd04eb06ad42 -r e1a853349529 src/mem/drampower.cc
--- a/src/mem/drampower.cc  Mon Nov 24 09:03:39 2014 -0500
+++ b/src/mem/drampower.cc  Tue Dec 02 06:07:32 2014 -0500
@@ -155,7 +155,8 @@
 {
 uint32_t burst_cycles = divCeil(p-tBURST, p-tCK);
 uint8_t data_rate = p-burst_length / burst_cycles;
-if (data_rate != 1  data_rate != 2)
-fatal(Got unexpected data rate %d, should be 1 or 2\n);
+// 4 for GDDR5
+if (data_rate != 1  data_rate != 2  data_rate != 4)
+fatal(Got unexpected data rate %d, should be 1 or 2 or 4\n);
 return data_rate;
 }

[gem5-dev] changeset in gem5: mem: Add missig timing and current parameters...

2014-10-10 Thread Omar Naji via gem5-dev
changeset f958ccec628f in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=f958ccec628f
description:
mem: Add missig timing and current parameters to DRAM configs

This patch adds missing timing and current parameters to the existing
DRAM configs. These missing timing and current parameters are required
by DRAMPower for the DRAM power calculations. The missing values are
datasheet values of the specified DRAMs, and the appropriate
references are added for the variuos configs.

diffstat:

 src/mem/DRAMCtrl.py |  209 +++
 1 files changed, 192 insertions(+), 17 deletions(-)

diffs (truncated from 322 to 300 lines):

diff -r 025a459edb87 -r f958ccec628f src/mem/DRAMCtrl.py
--- a/src/mem/DRAMCtrl.py   Thu Oct 09 17:52:04 2014 -0400
+++ b/src/mem/DRAMCtrl.py   Fri Jul 25 10:05:59 2014 +0100
@@ -122,6 +122,15 @@
 # to be instantiated for a multi-channel configuration
 channels = Param.Unsigned(1, Number of channels)
 
+# For power modelling we need to know if the DRAM has a DLL or not
+dll = Param.Bool(True, DRAM has DLL or not)
+
+# DRAMPower provides in addition to the core power, the possibility to
+# include RD/WR termination and IO power. This calculation assumes some
+# default values. The integration of DRAMPower with gem5 does not include
+# IO and RD/WR termination power by default. This might be added as an
+# additional feature in the future.
+
 # timing behaviour and constraints - all in nanoseconds
 
 # the base clock period of the DRAM
@@ -193,14 +202,108 @@
 tXAW = Param.Latency(X activation window)
 activation_limit = Param.Unsigned(Max number of activates in window)
 
+# time to exit power-down mode
+# Exit power-down to next valid command delay
+tXP = Param.Latency(0ns, Power-up Delay)
+
+# Exit Powerdown to commands requiring a locked DLL
+tXPDLL = Param.Latency(0ns, Power-up Delay with locked DLL)
+
+# time to exit self-refresh mode
+tXS = Param.Latency(0ns, Self-refresh exit latency)
+
+# time to exit self-refresh mode with locked DLL
+tXSDLL = Param.Latency(0ns, Self-refresh exit latency DLL)
+
 # Currently rolled into other params
 ##
 
 # tRC  - assumed to be tRAS + tRP
 
+# Power Behaviour and Constraints
+# DRAMs like LPDDR and WideIO have 2 external voltage domains. These are
+# defined as VDD and VDD2. Each current is defined for each voltage domain
+# separately. For example, current IDD0 is active-precharge current for
+# voltage domain VDD and current IDD02 is active-precharge current for
+# voltage domain VDD2.
+# By default all currents are set to 0mA. Users who are only interested in
+# the performance of DRAMs can leave them at 0.
+
+# Operating 1 Bank Active-Precharge current
+IDD0 = Param.Current(0mA, Active precharge current)
+
+# Operating 1 Bank Active-Precharge current multiple voltage Range
+IDD02 = Param.Current(0mA, Active precharge current VDD2)
+
+# Precharge Power-down Current: Slow exit
+IDD2P0 = Param.Current(0mA, Precharge Powerdown slow)
+
+# Precharge Power-down Current: Slow exit multiple voltage Range
+IDD2P02 = Param.Current(0mA, Precharge Powerdown slow VDD2)
+
+# Precharge Power-down Current: Fast exit
+IDD2P1 = Param.Current(0mA, Precharge Powerdown fast)
+
+# Precharge Power-down Current: Fast exit multiple voltage Range
+IDD2P12 = Param.Current(0mA, Precharge Powerdown fast VDD2)
+
+# Precharge Standby current
+IDD2N = Param.Current(0mA, Precharge Standby current)
+
+# Precharge Standby current multiple voltage range
+IDD2N2 = Param.Current(0mA, Precharge Standby current VDD2)
+
+# Active Power-down current: slow exit
+IDD3P0 = Param.Current(0mA, Active Powerdown slow)
+
+# Active Power-down current: slow exit multiple voltage range
+IDD3P02 = Param.Current(0mA, Active Powerdown slow VDD2)
+
+# Active Power-down current : fast exit
+IDD3P1 = Param.Current(0mA, Active Powerdown fast)
+
+# Active Power-down current : fast exit multiple voltage range
+IDD3P12 = Param.Current(0mA, Active Powerdown fast VDD2)
+
+# Active Standby current
+IDD3N = Param.Current(0mA, Active Standby current)
+
+# Active Standby current multiple voltage range
+IDD3N2 = Param.Current(0mA, Active Standby current VDD2)
+
+# Burst Read Operating Current
+IDD4R = Param.Current(0mA, READ current)
+
+# Burst Read Operating Current multiple voltage range
+IDD4R2 = Param.Current(0mA, READ current VDD2)
+
+# Burst Write Operating Current
+IDD4W = Param.Current(0mA, WRITE current)
+
+# Burst Write Operating Current multiple voltage range
+IDD4W2 = Param.Current(0mA, WRITE current VDD2)
+
+# Refresh Current
+IDD5 = 

[gem5-dev] changeset in gem5: mem: Remove DRAMSim2 DDR3 configuration

2014-10-10 Thread Omar Naji via gem5-dev
changeset 025a459edb87 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=025a459edb87
description:
mem: Remove DRAMSim2 DDR3 configuration

This patch prunes the DDR3 config that was initially created to match
the default config of DRAMSim2. The config is not complete as it is,
and to avoid having to maintain it, the easiest way forward is to
simply prune it. Going forward we are adding power number etc to the
other configurations.

diffstat:

 src/mem/DRAMCtrl.py |  63 -
 1 files changed, 0 insertions(+), 63 deletions(-)

diffs (73 lines):

diff -r 0caf62b57dfd -r 025a459edb87 src/mem/DRAMCtrl.py
--- a/src/mem/DRAMCtrl.py   Thu Oct 09 17:52:03 2014 -0400
+++ b/src/mem/DRAMCtrl.py   Thu Oct 09 17:52:04 2014 -0400
@@ -348,69 +348,6 @@
 # =85C, half for 85C
 tREFI = '7.8us'
 
-# A single DDR3 x64 interface (one command and address bus), with
-# default timings based on DDR3-1333 4 Gbit parts in an 8x8
-# configuration, which would amount to 4 GByte of memory.  This
-# configuration is primarily for comparing with DRAMSim2, and all the
-# parameters except ranks_per_channel are based on the DRAMSim2 config
-# file DDR3_micron_32M_8B_x8_sg15.ini. Note that ranks_per_channel has
-# to be manually set, depending on size of the memory to be
-# simulated. By default DRAMSim2 has 2048MB of memory with a single
-# rank. Therefore for 4 GByte memory, set ranks_per_channel = 2
-class DDR3_1333_x64_DRAMSim2(DRAMCtrl):
-# 8x8 configuration, 8 devices each with an 8-bit interface
-device_bus_width = 8
-
-# DDR3 is a BL8 device
-burst_length = 8
-
-# Each device has a page (row buffer) size of 1KB
-# (this depends on the memory density)
-device_rowbuffer_size = '1kB'
-
-# 8x8 configuration, so 8 devices
-devices_per_rank = 8
-
-# Use two ranks
-ranks_per_channel = 2
-
-# DDR3 has 8 banks in all configurations
-banks_per_rank = 8
-
-# 666 MHs
-tCK = '1.5ns'
-
-tRCD = '15ns'
-tCL = '15ns'
-tRP = '15ns'
-tRAS = '36ns'
-tWR = '15ns'
-tRTP = '7.5ns'
-
-# 8 beats across an x64 interface translates to 4 clocks @ 666.66 MHz.
-# Note this is a BL8 DDR device.
-tBURST = '6ns'
-
-tRFC = '160ns'
-
-# DDR3, =85C, half for 85C
-tREFI = '7.8us'
-
-# Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns
-tWTR = '7.5ns'
-
-# Default same rank rd-to-wr bus turnaround to 2 CK, @666.66 MHz = 3 ns
-tRTW = '3ns'
-
-# Default different rank bus delay to 2 CK, @666.66 MHz = 3 ns
-tCS = '3ns'
-
-tRRD = '6.0ns'
-
-tXAW = '30ns'
-activation_limit = 4
-
-
 # A single LPDDR2-S4 x32 interface (one command/address bus), with
 # default timings based on a LPDDR2-1066 4 Gbit part in a 1x32
 # configuration.
___
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev