On 2007-03-20, Timothy Normand Miller wrote:
> We might have to cheat a little now and again and have a small amount
> of muxing out the output of a stage.  We'll have to try it and see
> what the impact is.  Or we could lengthen the pipeline by a stage....
> or other things.
> 
> Sometimes, it's how you define the semantics.  The output of MEM is
> either what we get from a RAM block, or something we get from I/O, or
> something forwarded from an earlier stage.  That means we have to mux
> at least three different things AFTER the clock edge.  Either we put
> the MUX in MEM (and call it breaking the rules), or we pipe them all
> into WB and let it do the MUXing.  The only time it isn't goofy is
> when we add an extra pipeline stage (which we may need to do anyhow!).

It's good we be pragmatical about the convention.  Taking it to the
limit was nevertheless a nice pedagogical exercise.

> >Comparing to your new fetch stage, I see a have not made the PC
> >available as a return address.  An alternative to yours would be to make
> >it readable as a register so that it would be copied before the jump;
> >you probably though of that, and I guess it depends whether we need more
> >than one level of calls.
> 
> I think it would be good for this to be general-purpose enough that we
> would have multiple levels of calls.

My current solution is now to drop the current PC into the output
register of branch instructions.

                                 - - -

I attach a new unfinished version in case someone is watching.  The
major change in addition to saving the branch PC and bug fixes, is that
the store instruction accepts an address computed with immediate y,
instead using z for the stored value.
// Instruction Format
//
`define QMODE_BITS      31:30
`define QOP_BITS        29:27
`define QIMM_BIT        26
`define IZ_BITS         25:21
`define IX_BITS         20:16
`define IY_BITS         15:11
`define IMM_BITS        15:0
`define IMM_SIGN_BIT    15

// Instruction Format: Major Operating Mode
//
`define QMODE_ARITH     0
`define QMODE_FETCH     1
`define QMODE_STORE     2
`define QMODE_BRANCH    3
`define QMODE_STORE_OR_BRANCH_BIT 31
`define QMODE_FETCH_OR_BRANCH_BIT 30

// Instruction Format: ALU Operations
`define QOP_AND     0
`define QOP_OR      1
`define QOP_XOR     2   // XOR with -1 for NOT.
`define QOP_SL      3
`define QOP_ADD     4
`define QOP_RSUB    5   // Reversed args for maximum flexibility.
`define QOP_MULT    6

// Instruction Format: Branch Condition
//
// These overlaps with QOP_BITS which is OK since ALU is not used when
// branching.
//
`define BNOT_BIT        29    // negates branch condition
`define BZERO_BIT       28    // branch if zero (can combine with BNEG_BIT)
`define BNEG_BIT        27    // branch if negative
//
// The branch conditions can be expanded to:
//
//   3'b000 noop                        3'b100 branch always (jump)
//   3'b001 branch if negative          3'b101 branch if non-negative
//   3'b010 branch if zero              3'b110 branch if non-zero
//   3'b011 branch if non-positive      3'b111 branch if positive

`define IREG_BITBUCKET  15
`define START_ADDR      0
`define SAFE_INSN       32'h00000000


// The Top-Level CPU Module
//
module ogmisc(clock, reset_, do_upload, upload_addr, upload_data);
    input clock;
    input reset_;

    input do_upload;
    input[8:0] upload_addr;
    input[31:0] upload_data;

wire[31:0] s1_insn, s2_insn, s3_insn;
wire[8:0] s1_pc;
wire[31:0] s2_x;
wire[31:0] s2_y, s2_store_val;
wire[31:0] s3_store_val;
wire[4:0] s4_wb_reg;
wire[31:0] s3_z, s4_wb_val;
wire s4_wb_enable;

ogmisc_stg1_fetch stg1(
    clock, reset_,
    do_upload, upload_addr, upload_data,
    s1_insn, s1_pc,
    s2_insn[`BNOT_BIT], s2_insn[`BZERO_BIT], s2_insn[`BNEG_BIT], s2_x, s2_y);

ogmisc_stg2_regio stg2(
    .clock(clock), .reset_(reset_), .clock_2x(clock_2x), .phase(phase),
    .insn(s1_insn), .pc(s1_pc),
    .insn_o(s2_insn), .x_o(s2_x), .y_o(s2_y), .yz_o(s2_store_val),
    .wb_enable(s4_wb_enable), .wb_reg(s4_wb_reg), .wb_val(s4_wb_val));

ogmisc_stg3_alu stg3(
    clock, reset_,
    s2_insn, s2_x, s2_y, s2_store_val,
    s3_insn, s3_z, s3_store_val);

ogmisc_stg4_memio stg4(
    clock, reset_,
    s3_insn, s3_z, s3_store_val,
    s4_wb_enable, s4_wb_reg, s4_wb_val);

endmodule


// Stage 1: Instruction Fetch
//
module ogmisc_stg1_fetch(clock, reset_, do_upload, upload_addr, upload_data,
                         insn_o, pc_o,
                         s2_bnot, s2_bzero, s2_bneg, s2_x, s2_y);
    input clock;
    input reset_;

    //> On rising edge where a[do_upload] is non-zero, a[upload_data] is
    //  stoned at a[upload_addr] in program memory.
    input do_upload;
    input[8:0] upload_addr;
    input[31:0] upload_data;

    //> The current instruction.
    output[31:0] insn_o;
    //> The address just after the current instruction.
    output[8:0] pc_o;

    //> On rising edge when the instruction at stage 2 is a branch, a
    //  conditional branch is done.  If a[s2_bnot] is 0, the condition
    //  is that a[s2_bzero] is 1 and a[s2_x] is zero or a[s2_bneg] is 1 and
    //  a[s2_x] is negative.  If a[s2_bnot] is 1, the condition is negated.
    input s2_bnot;
    input s2_bzero;
    input s2_bneg;
    //> Test register for branch.
    input[31:0] s2_x;
    //> The target address for branch.
    input[31:0] s2_y;

reg s2_insn_is_branch;
wire is_zero = s2_x == 0;
wire is_neg = s2_x[31];
wire do_branch = s2_insn_is_branch
        && s2_bnot != (is_zero == s2_bzero && is_neg == s2_bneg);

reg [8:0] pc_o;
wire [8:0] next_pc = do_branch ? s2_y : pc_o;

always @(posedge clock or negedge reset_)
    if (!reset_) begin
        pc_o <= `START_ADDR - 1;
        s2_insn_is_branch <= 0;
    end else begin
        pc_o <= next_pc + 1;
        s2_insn_is_branch <= insn_o[`QMODE_BITS] == `QMODE_BRANCH;
    end

RAMB16_S36_S36 program_memory (
    .CLKA(clock), .SSRA(1'b0),
    .ADDRA(next_pc),
    .ENA(1'b1),         .DOA(insn_o),         .DOPA(),
    .WEA(1'b0),         .DIA(32'b0),            .DIPA(4'b0),

    .CLKB(clock), .SSRB(1'b0),
    .ADDRB(upload_addr),
    .ENB(1'b1),         .DOB(),                 .DOPB(),
    .WEB(do_upload),    .DIB(upload_data),      .DIPB(4'b0));

endmodule


// Stages 2, 5: Register Fetch and Write-Back
//
module ogmisc_stg2_regio(clock, reset_, clock_2x, phase,
                         insn, pc,
                         insn_o, x_o, y_o, yz_o,
                         wb_enable, wb_reg, wb_val);
    input clock;
    input reset_;
    input clock_2x;
    input phase;

    input[31:0] insn;
    input[8:0] pc;

    output[31:0] insn_o;
    output[31:0] x_o;
    output[31:0] y_o;
    //! r_iy if not immediate, else r_iz.
    output[31:0] yz_o;

    input wb_enable;
    input[4:0] wb_reg;
    input[31:0] wb_val;

reg[31:0] x_o, yz_o;
reg[31:0] insn_o;
wire[31:0] y_out_if_imm;

wire[4:0] ix = insn[`IX_BITS];
wire[4:0] iyz = insn[`QIMM_BIT]? insn[`IZ_BITS] : insn[`IY_BITS];

always @(posedge clock or negedge reset_)
    if (!reset_) insn_o <= `SAFE_INSN;
    else         insn_o <= insn;

reg[31:0] regfile[0:31];
always @(posedge clock_2x)
    if (phase == 1) begin // falling edge of clock (right?)
        if (wb_enable)
            regfile[wb_reg] <= wb_val;
    end else begin
        if (insn[`QMODE_BITS] == `QMODE_BRANCH)
            x_o <= pc + 1;
        else
            x_o <= regfile[ix];
        yz_o <= regfile[iyz];
    end

assign y_out_if_imm = {{16{insn_o[`IMM_SIGN_BIT]}}, insn_o[`IMM_BITS]};
assign y_o = insn_o[`QIMM_BIT]? y_out_if_imm : yz_o;

endmodule


// Stage 3: ALU
//
module ogmisc_stg3_alu(clock, reset_, insn, x, y, yz, insn_o, res_o, yz_o);
    input clock;
    input reset_;

    input[31:0] insn;
    input[31:0] x;
    input[31:0] y;
    input[31:0] yz;

    output[31:0] insn_o;
    output[31:0] res_o;
    output[31:0] yz_o;

reg[31:0] res_o, yz_o;
reg[31:0] insn_o;

always @(posedge clock or negedge reset_)
    if (!reset_)
        insn_o <= `SAFE_INSN;
    else begin
        yz_o <= yz;
        if (insn[`QMODE_STORE_OR_BRANCH_BIT]) begin
            res_o <= x;
        end else case (insn[`QOP_BITS])
            `QOP_AND:  res_o <= x & y;
            `QOP_OR:   res_o <= x | y;
            `QOP_XOR:  res_o <= x ^ y;
            `QOP_SL:   res_o <= x << y;// FIXME. Can we have signed y?
            `QOP_ADD:  res_o <= y + x; // FIXME. Explicit instatiate ADDSUB
            `QOP_RSUB: res_o <= y - x; // ...
            `QOP_MULT: res_o <= x * y; // FIXME. Explicit instatiate.
        endcase
    end

endmodule


// Stage 4: The Memory Access Stage
//
module ogmisc_stg4_memio(clock, reset_, insn, val_or_addr, store_val,
                         wb_enable_o, wb_reg_o, wb_val_o);
    input clock;
    input reset_;

    input[31:0] insn;
    input[31:0] val_or_addr;
    input[31:0] store_val;

    output wb_enable_o;
    output[4:0] wb_reg_o;
    output[31:0] wb_val_o;

wire is_store = insn[`QMODE_BITS] == `QMODE_STORE;
reg is_fetch;

reg wb_enable_o;
reg[31:0] wb_val_if_not_fetch;
wire[31:0] wb_val_if_fetch;
reg[4:0] wb_reg_o;

always @(posedge clock) begin
    is_fetch <= insn[`QMODE_BITS] == `QMODE_FETCH;
    wb_val_if_not_fetch <= val_or_addr;
    wb_enable_o <= !is_store;
    wb_reg_o <= insn[`IZ_BITS];
end

assign wb_val_o = is_fetch? wb_val_if_fetch : wb_val_if_not_fetch;

RAMB16_S36_S36 local_memory(
    .CLKA(clock), .SSRA(1'b0),
    .ADDRA(val_or_addr[8:0]),
    .ENA(1'b1),         .DOA(wb_val_if_fetch),  .DOPA(),
    .WEA(is_store),     .DIA(store_val),        .DIPA(4'b0),

    .CLKB(1'b0), .SSRB(1'b0),
    .ADDRB(9'b0),
    .ENB(1'b0),         .DOB(),                 .DOPB(),
    .WEB(1'b0),         .DIB(32'b0),            .DIPB(4'b0));

endmodule
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Reply via email to