I worked in condition flags.
`include "defines.v"
// Convert left shift amount into one-hot so we can use the
// multiplier to compute a left shift
// Note: Economizes on hardware, but not necessarily on energy
module shift_one_hot(
input clock,
input [31:0] in,
output reg [31:0] out);
always @(posedge clock) begin
if (0 != in[31:5]) begin
out[31:0] <= 0;
end else begin
out <= 32'b1 << in[4:0];
// Test the following as an alternative:
// out <= 0;
// out[in[4:0]] <= 1;
// These two implementations should be the same thing (a decoder),
// but the synthesizer may be stupid about it, and we need to pick
// the better one.
end
end
endmodule
// Right shift, spread over five stages, so that each stage has huge
// positive slack.
module int_right_shift(
input clock,
input [31:0] in,
input [31:0] shift_amt,
input arith_shift,
output [31:0] out,
output C_out);
reg zero, m1;
reg [31:0] val1;
reg [3:0] sh1;
reg as1, carry1;
always @(posedge clock) begin
zero <= 0;
m1 <= 0;
if (shift_amt[31:5] != 0) begin
zero <= 1;
m1 <= arith_shift && in[31];
end
if (shift_amt[4]) begin
val1[31:16] <= {16{arith_shift && in[31]}};
val1[15:0] <= in[31:16];
carry1 <= in[15];
else begin
val1 <= in;
carry1 <= 0;
end
sh1 <= shift_amt;
as1 <= arith_shift;
end
reg [31:0] val2;
reg [2:0] sh2;
reg as2, carry2;
always @(posedge clock) begin
if (zero) begin
val2 <= {32{m1}};
end else
if (sh1[3]) begin
val2[31:24] <= {8{as1 && val1[31]}};
val2[23: 0] <= val1[31:8];
carry2 <= val1[7];
else begin
val2 <= val1;
carry2 <= carry1;
end
sh2 <= sh1;
as2 <= as1;
end
reg [31:0] val3;
reg [1:0] sh3;
reg as3, carry3;
always @(posedge clock) begin
if (sh2[2]) begin
val3[31:28] <= {4{as2 && val2[31]}};
val3[27: 0] <= val2[31:4];
carry3 <= val2[3];
else begin
val3 <= val2;
carry3 <= carry2;
end
sh3 <= sh2;
as3 <= as2;
end
reg [31:0] val4;
reg sh4;
reg as4, carry4;
always @(posedge clock) begin
if (sh3[1]) begin
val4[31:30] <= {2{as3 && val3[31]}};
val4[29: 0] <= val3[31:2];
carry4 <= val3[1];
else begin
val4 <= val3;
carry4 <= carry3;
end
sh4 <= sh3;
as4 <= as3;
end
reg [31:0] val5;
reg carry5;
always @(posedge clock) begin
if (sh4) begin
val5[31] <= as4 && val4[31];
val5[30: 0] <= val4[31:1];
carry5 <= val4[0];
else begin
val5 <= val4;
carry5 <= carry4;
end
end
assign out = val5;
assign C_out = carry5;
endmodule
// AND, OR, XOR, MOVE (useful for conditional move), and MUX
// Contrary to the diagram, this needs to be put at the TOP of the pipeline,
// so we're carrying one result instead of two input operands down the
pipeline
module logical_ops(
input clock,
input predicate, // For MUX op
input [2:0] logical_op,
input [31:0] A,
input [31:0] B,
output reg [31:0] C);
always @(posedge clock) begin
C <= 0;
case (logical_op)
INT_OP_AND:
C <= A & B;
INT_OP_OR:
C <= A | B;
INT_OP_XOR:
C <= A ^ B;
/* INT_OP_MIN:
INT_OP_MAX: */ // not implemented, requires comparison
INT_OP_MOVE:
C <= A;
INT_OP_MUX:
C <= predicate ? B : A;
endcase
end
endmodule
// Integer add/sub spread over 6 pipeline stages, for huge positive slack
// Check me: For subtract, carry_in needs to be 1, and the carry_out will
be
// 1 if there's no borrow.
// I had a hard time keeping track of the numbers. I probably have a
// typo somewhere.
module integer_addsub(
input clock,
input [31:0] A,
input [31:0] B,
input sub,
input carry_in,
output [31:0] out,
output C_out,
output V_out,
output Z_out);
wire [32:0] A0 = {A, 1'b1};
wire [32:0] B0 = {(B ^ {32{sub}}), carry_in}; // Did I get this right?
// STAGE 1: 1's complement, Add bits 3:0
reg [32:3] A1, B1;
reg [3:1] C1;
reg [1:0] sign1;
wire [3:0] sum1 = A0[2:0] + B0[2:0]; // 4
always @(posedge clock) begin
A1 <= A0[32:3];
B1 <= B0[32:3];
C1[3:1] <= sum1[3:1];
sign[0] <= A[31];
// Edge cases for computing sign of -B when subtracting:
// * B==0, sign stays zero
// * B==0x80000000,
// * If A is also negative, we won't get overflow, so
// we need A[31] != !B[31], meaning that we need to compute
// a positive sign for -B.
// * If A is non-negative, we'll always get overflow,
// So we need to ensure that A[31] == !B[31] && A[31] != C[31].
// This is is also achieved by passing a positive sign for -B.
// * Conclusion: The only case where we don't change the sign is
// when the entire B is zero.
// * This is interesting, because 0 and 0x80000000 are the two cases
// where 2's complement yields the same number. Basically,
// as far as overflow is concerned, we treat -0x80000000 as
// a positive number.
sign[1] <= sub ? (!B[31] && B[31:0]!=0) : B[31];
end
// STAGE 2: Add bits 8:3
reg [32:9] A2, B2;
reg [9:1] C2;
reg [1:0] sign2;
reg Z2;
wire [9:2] sum2 = {A1[8:3], 1'b1} + {B1[8:3], C1[3]}; // 8
always @(posedge clock) begin
A2 <= A1[32:9];
B2 <= B1[32:9];
Z2 <= C1[2:1] == 0;
C2[2:1] <= C1[2:1];
C2[9:3] <= sum2[9:3];
sign2 <= sign1;
end
// STAGE 3: Add bits 14:9
reg [32:15] A3, B3;
reg [15:1] C3;
reg [1:0] sign3;
reg Z3;
wire [15:8] sum3 = {A2[14:9], 1'b1} + {B2[14:9], C2[9]}; // 8
always @(posedge clock) begin
A3 <= A2[32:15];
B3 <= B2[32:15];
Z3 <= C2[8:3] == 0 && Z2;
C3[8:1] <= C2[8:1];
C3[15:9] <= sum3[15:9];
sign3 <= sign2;
end
// STAGE 4: Add bits 20:15
reg [32:21] A4, B4;
reg [21:1] C4;
reg [1:0] sign4;
reg Z4;
wire [21:14] sum4 = {A3[20:15], 1'b1} + {B3[20:15], C3[15]}; // 8
always @(posedge clock) begin
A4 <= A3[32:21];
B4 <= B3[32:21];
Z4 <= C[14:9] == 0 && Z3;
C4[14:1] <= C3[14:1];
C4[21:15] <= sum4[21:15];
sign4 <= sign3;
end
// STAGE 5: Add bits 26:21
reg [32:27] A5, B5;
reg [27:1] C5;
reg [1:0] sign5;
reg Z5;
wire [27:20] sum5 = {A4[26:21], 1'b1} + {B4[26:21], C4[21]}; // 8
always @(posedge clock) begin
A5 <= A4[32:27];
B5 <= B4[32:27];
Z5 <= C4[20:15] == 0 && Z4;
C5[20:1] <= C4[20:1];
C5[27:21] <= sum5[27:21];
sign5 <= sign4;
end
// STAGE 6: Add bits 32:27
reg [33:1] C6;
reg [1:0] sign6;
reg Z6;
wire [33:26] sum6 = {A5[32:27], 1'b1} + {B5[32:27], C5[27]}; // 8
always @(posedge clock) begin
Z6 <= C5[26:21] == 0 && Z5;
C6[26:1] <= C5[26:1];
C6[33:27] <= sum5[33:27];
sign6 <= sign5;
end
assign out = C6[32:1];
assign C_out = C6[33];
assign V_out = (sign6==0 && C6[32]) || (sign6==3 && !C6[32]);
assign Z_out = C6[32:27] == 0 && Z6;
endmodule
--
Timothy Normand Miller, PhD
Assistant Professor of Computer Science, Binghamton University
http://www.cs.binghamton.edu/~millerti/
Open Graphics Project
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)