I worked in condition flags.

`include "defines.v"



// Convert left shift amount into one-hot so we can use the
// multiplier to compute a left shift
// Note:  Economizes on hardware, but not necessarily on energy
module shift_one_hot(
    input clock,
    input [31:0] in,
    output reg [31:0] out);

always @(posedge clock) begin
    if (0 != in[31:5]) begin
        out[31:0] <= 0;
    end else begin
        out <= 32'b1 << in[4:0];
        // Test the following as an alternative:
        //   out <= 0;
        //   out[in[4:0]] <= 1;
        // These two implementations should be the same thing (a decoder),
        // but the synthesizer may be stupid about it, and we need to pick
        // the better one.
    end
end

endmodule




// Right shift, spread over five stages, so that each stage has huge
// positive slack.
module int_right_shift(
    input clock,
    input [31:0] in,
    input [31:0] shift_amt,
    input arith_shift,
    output [31:0] out,
    output C_out);

reg zero, m1;
reg [31:0] val1;
reg [3:0] sh1;
reg as1, carry1;

always @(posedge clock) begin
    zero <= 0;
    m1 <= 0;
    if (shift_amt[31:5] != 0) begin
        zero <= 1;
        m1 <= arith_shift && in[31];
    end

    if (shift_amt[4]) begin
        val1[31:16] <= {16{arith_shift && in[31]}};
        val1[15:0]  <= in[31:16];
        carry1 <= in[15];
    else begin
        val1 <= in;
        carry1 <= 0;
    end

    sh1 <= shift_amt;
    as1 <= arith_shift;
end


reg [31:0] val2;
reg [2:0] sh2;
reg as2, carry2;

always @(posedge clock) begin
    if (zero) begin
        val2 <= {32{m1}};
    end else
    if (sh1[3]) begin
        val2[31:24] <= {8{as1 && val1[31]}};
        val2[23: 0] <= val1[31:8];
        carry2 <= val1[7];
    else begin
        val2 <= val1;
        carry2 <= carry1;
    end

    sh2 <= sh1;
    as2 <= as1;
end


reg [31:0] val3;
reg [1:0] sh3;
reg as3, carry3;

always @(posedge clock) begin
    if (sh2[2]) begin
        val3[31:28] <= {4{as2 && val2[31]}};
        val3[27: 0] <= val2[31:4];
        carry3 <= val2[3];
    else begin
        val3 <= val2;
        carry3 <= carry2;
    end

    sh3 <= sh2;
    as3 <= as2;
end


reg [31:0] val4;
reg sh4;
reg as4, carry4;

always @(posedge clock) begin
    if (sh3[1]) begin
        val4[31:30] <= {2{as3 && val3[31]}};
        val4[29: 0] <= val3[31:2];
        carry4 <= val3[1];
    else begin
        val4 <= val3;
        carry4 <= carry3;
    end

    sh4 <= sh3;
    as4 <= as3;
end


reg [31:0] val5;
reg carry5;

always @(posedge clock) begin
    if (sh4) begin
        val5[31]    <= as4 && val4[31];
        val5[30: 0] <= val4[31:1];
        carry5 <= val4[0];
    else begin
        val5 <= val4;
        carry5 <= carry4;
    end
end


assign out = val5;
assign C_out = carry5;

endmodule



// AND, OR, XOR, MOVE (useful for conditional move), and MUX
// Contrary to the diagram, this needs to be put at the TOP of the pipeline,
// so we're carrying one result instead of two input operands down the
pipeline
module logical_ops(
    input clock,
    input predicate,  // For MUX op
    input [2:0] logical_op,
    input [31:0] A,
    input [31:0] B,
    output reg [31:0] C);

always @(posedge clock) begin
    C <= 0;
    case (logical_op)
        INT_OP_AND:
            C <= A & B;
        INT_OP_OR:
            C <= A | B;
        INT_OP_XOR:
            C <= A ^ B;
        /* INT_OP_MIN:
        INT_OP_MAX: */ // not implemented, requires comparison
        INT_OP_MOVE:
            C <= A;
        INT_OP_MUX:
            C <= predicate ? B : A;
    endcase
end

endmodule



// Integer add/sub spread over 6 pipeline stages, for huge positive slack
// Check me:  For subtract, carry_in needs to be 1, and the carry_out will
be
//            1 if there's no borrow.
// I had a hard time keeping track of the numbers.  I probably have a
// typo somewhere.
module integer_addsub(
    input clock,
    input [31:0] A,
    input [31:0] B,
    input sub,
    input carry_in,
    output [31:0] out,
    output C_out,
    output V_out,
    output Z_out);

wire [32:0] A0 = {A, 1'b1};
wire [32:0] B0 = {(B ^ {32{sub}}), carry_in};  // Did I get this right?

// STAGE 1:  1's complement, Add bits 3:0
reg [32:3] A1, B1;
reg [3:1] C1;
reg [1:0] sign1;
wire [3:0] sum1 = A0[2:0] + B0[2:0];    // 4
always @(posedge clock) begin
    A1 <= A0[32:3];
    B1 <= B0[32:3];

    C1[3:1] <= sum1[3:1];

    sign[0] <= A[31];
    // Edge cases for computing sign of -B when subtracting:
    // * B==0, sign stays zero
    // * B==0x80000000,
    //     * If A is also negative, we won't get overflow, so
    //       we need A[31] != !B[31], meaning that we need to compute
    //       a positive sign for -B.
    //     * If A is non-negative, we'll always get overflow,
    //       So we need to ensure that A[31] == !B[31] && A[31] != C[31].
    //       This is is also achieved by passing a positive sign for -B.
    //     * Conclusion:  The only case where we don't change the sign is
    //       when the entire B is zero.
    //     * This is interesting, because 0 and 0x80000000 are the two cases
    //       where 2's complement yields the same number.  Basically,
    //       as far as overflow is concerned, we treat -0x80000000 as
    //       a positive number.
    sign[1] <= sub ? (!B[31] && B[31:0]!=0) : B[31];
end


// STAGE 2:  Add bits 8:3
reg [32:9] A2, B2;
reg [9:1] C2;
reg [1:0] sign2;
reg Z2;
wire [9:2] sum2 = {A1[8:3], 1'b1} + {B1[8:3], C1[3]};  // 8
always @(posedge clock) begin
    A2 <= A1[32:9];
    B2 <= B1[32:9];

    Z2 <= C1[2:1] == 0;
    C2[2:1] <= C1[2:1];
    C2[9:3] <= sum2[9:3];

    sign2 <= sign1;
end


// STAGE 3:  Add bits 14:9
reg [32:15] A3, B3;
reg [15:1] C3;
reg [1:0] sign3;
reg Z3;
wire [15:8] sum3 = {A2[14:9], 1'b1} + {B2[14:9], C2[9]}; // 8
always @(posedge clock) begin
    A3 <= A2[32:15];
    B3 <= B2[32:15];

    Z3 <= C2[8:3] == 0 && Z2;
    C3[8:1]  <= C2[8:1];
    C3[15:9] <= sum3[15:9];

    sign3 <= sign2;
end


// STAGE 4:  Add bits 20:15
reg [32:21] A4, B4;
reg [21:1] C4;
reg [1:0] sign4;
reg Z4;
wire [21:14] sum4 = {A3[20:15], 1'b1} + {B3[20:15], C3[15]};  // 8
always @(posedge clock) begin
    A4 <= A3[32:21];
    B4 <= B3[32:21];

    Z4 <= C[14:9] == 0 && Z3;
    C4[14:1]  <= C3[14:1];
    C4[21:15] <= sum4[21:15];

    sign4 <= sign3;
end


// STAGE 5:  Add bits 26:21
reg [32:27] A5, B5;
reg [27:1] C5;
reg [1:0] sign5;
reg Z5;
wire [27:20] sum5 = {A4[26:21], 1'b1} + {B4[26:21], C4[21]};  // 8
always @(posedge clock) begin
    A5 <= A4[32:27];
    B5 <= B4[32:27];

    Z5 <= C4[20:15] == 0 && Z4;
    C5[20:1] <= C4[20:1];
    C5[27:21] <= sum5[27:21];

    sign5 <= sign4;
end


// STAGE 6:  Add bits 32:27
reg [33:1] C6;
reg [1:0] sign6;
reg Z6;
wire [33:26] sum6 = {A5[32:27], 1'b1} + {B5[32:27], C5[27]};  // 8
always @(posedge clock) begin
    Z6 <= C5[26:21] == 0 && Z5;
    C6[26:1] <= C5[26:1];
    C6[33:27] <= sum5[33:27];

    sign6 <= sign5;
end


assign out = C6[32:1];
assign C_out = C6[33];
assign V_out = (sign6==0 && C6[32]) || (sign6==3 && !C6[32]);
assign Z_out = C6[32:27] == 0 && Z6;

endmodule


-- 
Timothy Normand Miller, PhD
Assistant Professor of Computer Science, Binghamton University
http://www.cs.binghamton.edu/~millerti/
Open Graphics Project
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Reply via email to