On 7/22/06, Petter Urkedal <[EMAIL PROTECTED]> wrote:
Timothy Miller wrote:
> There's lots of stuff you can do earlier in the pipeline, such as
> detecting row misses and such.  One approach I've considered (which
> suffers from the evil two-level state machine problem) is something
> like what I describe below.  Some of the pipelining might seem
> excessive, but the idea is to get it to run at 200MHz.
>
What is the evil two-level state machine problem?

Well, perhaps it's a symptom of the way I've done it before, but
here's the problem.  You have state machine A feeding input to state
machine B.  B has a busy signal out to A, because it may execute more
than one cycle for any given command from A.  In order to register the
busy signal, B has to be able to register a command from A.  This lets
A get one step ahead.  The result is that A's behavior is spread out
in time and is ahead of B on a timing diagram.  It's a mess and
impossible to debug.  If you could get it to work right the first
time, it would be great, but if not, you're in for a lot of work
staring at simulation wave forms, where the causality is very hard to
see.

I made a sketch inspired by this. Your mention of 1-hot encoding (which
I had to look up) gave me the idea that this can be used for counters,
but with a simple mask of 1bits for each occupied cycle.

*smacks forehead*  Why didn't I think of that!

Since you've brought up the idea, I can see how to really leverage
this.  For the DRAM controller, the number of wait cycles really isn't
all that many, so it doesn't hurt to use shift registers for counting.
There are only a handful, and the counts are generally less than 8,
with a few being longer, but not by much.

Doing it this way, it might be easy to get it to go at 200MHz, and it
would certainly kick the ass of the BRAM-based version I've been
toying with.

BTW, I see two good ways of doing the counters.  First of all, for a
counter of n bits, then when the counter is n, then bit n is set.
That is, the register is (1<<n).

There's this way:

reg [15:0] counter;
always @(posedge clock) begin


A nice
side-effect is that a max-function which is needed at one point, is a
simple bitwise or. The sketch is still not programmable, but instead
executes directly at stage 4. It would be good if you or someone else
share your opinion on whether this could be warped to 200 MHz, before I
spend more time on it.



module ddr_ctl(clock, reset_, req, busy, rd_avail, rd_tag,
               ddr_clk, ddr_clk_, ddr_cke, ddr_bank, ddr_addr,
               ddr_cmd, ddr_dm, ddr_dq_io, ddr_dqs);
    parameter chip_count = 2;

    // Bus Widths
    parameter tag_width = 4;
    parameter cmd_width = 2;
    parameter addr_width = 24;
    parameter   col_width = 9;
    parameter   row_width = 13;
    parameter   bank_width = 2;
    parameter data_width = 16*chip_count;
    parameter mask_width = data_width/8;
    parameter bank_count = 1 << bank_width;

    // Commands
    parameter CMD_REFRESH = 2'b11;
    parameter CMD_READ = 2'b01;
    parameter CMD_WRITE = 2'b10;

    // The Request Format
    parameter req_width = cmd_width + tag_width + addr_width
                        + data_width + mask_width;
    parameter req_mask_offset = 0;
    parameter req_data_offset = req_mask_offset + mask_width;
    parameter req_addr_offset = req_data_offset + data_width;
    parameter   req_col_offset = req_addr_offset;
    parameter   req_row_offset = req_col_offset + col_width;
    parameter   req_bank_offset = req_row_offset + row_width;
    parameter req_cmd_offset = req_addr_offset + addr_width;
    parameter req_tag_offset = req_cmd_offset + cmd_width;

    // Client Connectors
    input clock;
    input reset_;
    input[req_width-1:0] req;
    output busy;
    output rd_avail;
    output[tag_width-1:0] rd_tag;

    // DDR Connectors
    // Some of the ports connects to both memories, for others the lines are
    // split between them.
    parameter ddr_addr_width = 12;
    parameter DDR_CMD_NOOP      = 3'b111;
    parameter DDR_CMD_READ      = 3'b101;
    parameter DDR_CMD_WRITE     = 3'b100;
    parameter DDR_CMD_PRECHARGE = 3'b010;
    parameter DDR_CMD_ACTIVE    = 3'b011;
    output ddr_clk, ddr_clk_, ddr_cke;
    output[bank_width-1:0] ddr_bank;
    output[ddr_addr_width-1:0] ddr_addr;
    output[2:0] ddr_cmd; // = {ddr_ras_,ddr_cas_,ddr_we_}
    output[data_width/8-1:0] ddr_dm;
    inout[data_width-1:0] ddr_dq_io;
    inout[chip_count-1:0] ddr_dqs;

    // Maximum Configurable Timing Values
    parameter t_poa_max = 90;
    parameter t_read_max = 30;
    parameter t_write_max = 30;
    parameter t_rw_max = t_read_max > t_write_max? t_read_max : t_write_max;

// Functions to De-Compose a Request
function[cmd_width-1:0] req_cmd;
        input[req_width-1:0] req;
    req_cmd = req[req_cmd_offset + cmd_width - 1 : req_cmd_offset];
endfunction
function[tag_width-1:0] req_tag;
        input[req_width-1:0] req;
    req_tag = req[req_tag_offset + tag_width - 1 : req_tag_offset];
endfunction
function[bank_width-1:0] req_bank;
        input[req_width-1:0] req;
    req_bank = req[req_bank_offset + bank_width - 1 : req_bank_offset];
endfunction
function[row_width-1:0] req_row;
        input[req_width-1:0] req;
    req_row = req[req_row_offset + row_width - 1 : req_row_offset];
endfunction
function[col_width-1:0] req_col;
        input[req_width-1:0] req;
    req_col = req[req_col_offset + col_width - 1 : req_col_offset];
endfunction
function[mask_width-1:0] req_mask;
        input[req_width-1:0] req;
    req_mask = req[req_mask_offset + mask_width - 1 : req_mask_offset];
endfunction
function[data_width-1:0] req_data;
        input[req_width-1:0] req;
    req_data = req[req_data_offset + data_width - 1 : req_data_offset];
endfunction


// Interface to DDR and dealing with DDR, CAS latency and tDQSS.
parameter DDR_IOQ_CMD_READ = 2'b01;
parameter DDR_IOQ_CMD_WRITE = 2'b10;
reg[1:0]            ddr_ioq_cmd;
reg[mask_width-1:0] ddr_ioq_mask;
reg[data_width-1:0] ddr_ioq_data; // data for write, or tag for read

integer i;
parameter T_CAS = 3;  // TODO make configurable
parameter T_DQSS = 1;
parameter DDR_IOQ_LENGTH = T_CAS > T_DQSS? T_CAS : T_DQSS;
parameter DDR_IOQ_CMDBIT_READ = data_width+mask_width;
parameter DDR_IOQ_CMDBIT_WRITE = data_width+mask_width+1;
reg[data_width+mask_width+1:0] ddr_ioq_arr[0:DDR_IOQ_LENGTH];
wire[data_width+mask_width+1:0] ddr_ioq_current;
always @(posedge clock) begin
    for (i = 1; i < DDR_IOQ_LENGTH; i = i + 1)
        ddr_ioq_arr[i - 1] <= ddr_ioq_arr[i];
    if (ddr_ioq_cmd[0]) // read
        ddr_ioq_arr[T_CAS] <= {ddr_ioq_cmd,ddr_ioq_mask,ddr_ioq_data};
    if (ddr_ioq_cmd[1]) // write
        ddr_ioq_arr[T_DQSS] <= {ddr_ioq_cmd,ddr_ioq_mask,ddr_ioq_data};
end
reg busy;
reg[2:0] ddr_cmd;
reg[bank_width-1:0] ddr_bank;
reg[ddr_addr_width-1:0] ddr_addr;
assign ddr_dq_io = ddr_ioq_current[DDR_IOQ_CMDBIT_WRITE]
                   ? ddr_ioq_current[data_width-1:0] : 32'bz;
assign ddr_dm = ddr_ioq_current[data_width+mask_width-1:data_width];
assign rd_avail = ddr_ioq_current[DDR_IOQ_CMDBIT_READ];
assign rd_tag = ddr_ioq_current[tag_width-1:0];


// Stage 0
reg[req_width-1:0] s0_req;
always @(posedge clock) begin
    if (!busy) begin
        s0_req <= req;
    end
end

// Stage 1
reg[req_width-1:0] s1_req;
reg[row_width-1:0] s1_open_row;
reg[row_width-1:0] s1_open_row_arr[0:bank_count-1];
always @(posedge clock) begin
    if (!busy) begin
        s1_req <= s1_req;
        s1_open_row <= s1_open_row_arr[req_bank(req)];
        s1_open_row_arr[req_bank(req)] <= req_row(req);
    end
end

// Stage 2
reg[req_width-1:0] s2_req;
reg s2_maybe_row_hit;
always @(posedge clock) begin
    if (!busy) begin
        s2_req <= s1_req;
        s2_maybe_row_hit <= req_row(s1_req) == s1_open_row;
    end
end

// Stage 3
reg[req_width-1:0] s3_req;
reg s3_row_is_open_arr[0:bank_count-1];
reg s3_row_hit;
reg s3_need_precharge;
reg s3_need_activate;
reg s3_is_read;
reg s3_is_write;
always @(posedge clock) begin
    if (!busy) begin
        s3_req <= s2_req;
        s3_row_is_open_arr[req_bank(s2_req)] <= 1;
        s3_need_precharge <= 0;
        s3_need_activate <= 0;
        case (req_cmd(s2_req))
            CMD_REFRESH: begin
                for (i = 0; i < bank_count; i = i + 1)
                    s3_row_is_open_arr[i] <= 0;
            end
            CMD_READ, CMD_WRITE:
                if (s3_row_is_open_arr[req_bank(s2_req)]) begin
                    if (!s2_maybe_row_hit) begin
                        s3_need_precharge <= 1;
                        s3_need_activate <= 1;
                    end
                end else begin
                    s3_need_activate <= 1;
                end
        endcase
        if (req_cmd(s2_req) == CMD_READ) s3_is_read = 1'b1;
        if (req_cmd(s2_req) == CMD_WRITE) s3_is_write = 1'b1;
    end
end

// Stage 4
reg[t_poa_max-1:0] s4_tmask_poa;
reg[t_read_max-1:0] s4_tmask_read;
reg[t_write_max-1:0] s4_tmask_write;
reg[req_width-1:0] s4_req;

// Timing configuration (TODO: add inteface to change settings)
reg[t_poa_max-1:0]   tmask_precharge_to_activate;
reg[t_poa_max-1:0]   tmask_activate_to_precharge;
reg[t_poa_max-1:0]   tmask_read_to_precharge;
reg[t_poa_max-1:0]   tmask_write_to_precharge;
reg[t_rw_max-1:0]    tmask_activate_to_rw;
reg[t_write_max-1:0] tmask_read_to_write;
reg[t_read_max-1:0]  tmask_write_to_read;

always @(posedge clock) begin
    s4_tmask_poa   <= s4_tmask_poa   >> 1;
    s4_tmask_read  <= s4_tmask_read  >> 1;
    s4_tmask_write <= s4_tmask_write >> 1;

    ddr_cmd <= DDR_CMD_NOOP;
    busy <= 1;
    if (s3_need_precharge && !s4_tmask_poa[0]) begin
        ddr_cmd <= DDR_CMD_PRECHARGE;
        ddr_bank <= req_bank(s3_req);

        s3_need_precharge <= 0;
        s4_tmask_poa <= tmask_precharge_to_activate;
    end
    if (s3_need_activate && !s4_tmask_poa[0]) begin
        ddr_cmd <= DDR_CMD_ACTIVE;
        ddr_bank <= req_bank(s3_req);
        ddr_addr <= req_row(s3_req);

        s3_need_activate <= 0;
        s4_tmask_poa <= tmask_activate_to_precharge;
        s4_tmask_write <= tmask_activate_to_rw;
        s4_tmask_read <= tmask_activate_to_rw;
    end
    if (s3_is_read && !s4_tmask_read[0]) begin
        ddr_cmd <= DDR_CMD_READ;
        ddr_bank <= req_bank(s3_req);
        ddr_addr <= req_col(s3_req);
        ddr_ioq_cmd <= DDR_IOQ_CMD_READ;
        ddr_ioq_data <= req_tag(s3_req);

        busy <= 0;
        s4_tmask_poa   <= (s4_tmask_poa >> 1)   | tmask_read_to_precharge;
        s4_tmask_write <= (s4_tmask_write >> 1) | tmask_read_to_write;
    end
    if (s3_is_write && !s4_tmask_write[0]) begin
        ddr_cmd <= DDR_CMD_WRITE;
        ddr_bank <= req_bank(s3_req);
        ddr_addr <= req_col(s3_req);
        ddr_ioq_cmd <= DDR_IOQ_CMD_WRITE;
        ddr_ioq_mask <= req_mask(s3_req);
        ddr_ioq_data <= req_data(s3_req);

        busy <= 0;
        s4_tmask_poa  <= (s4_tmask_poa >> 1)  | tmask_write_to_precharge;
        s4_tmask_read <= (s4_tmask_read >> 1) | tmask_write_to_read;
    end
end

endmodule



_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Reply via email to