why unnecessary variable initialization is good with gcc -O3 and bad with just gcc

sh . vipin Sat, 02 Aug 2008 06:35:17 -0700

Below is a piece of fully compilable bit manipulation code. Let's say
we have two versions of the same code
1. variable "b" in "eval_output_out" is initialized
2. variable "b" in "eval_output_out" is NOT initialized


Code expands such that it is independent of past value of variable
"b", i dumped pre processed output of two versions and there was just
one difference of b initialized in one and not in other.

QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc.

if i just dump the assembly of two versions using "gcc -S" (i.e.
without any optimization) then behavior is expected which is version
#1 (with initialization) has one extra instruction to initialize
variable.

But when i dump the assembly of two versions with "gcc -O3 -S"
assembly of version 2 (without initialization) takes 4 extra
instruction.

Can somebody please explain me why the unnecessary variable
initialization with "0" is good in -O3 option. Though macros never
check the previous value of variables to be "0" or any other value.

I have attached the fully compilable source code and assembly of the
code with two version

ASSEMBLY OF VERSION #1 [ WITH INITIALIZATION]
------------------------------------------------
.globl eval_output_out
        .type   eval_output_out,@function
eval_output_out:
        pushl   %ebp
        movl    width_masks+4, %edx
        movl    %esp, %ebp
        movl    8(%ebp), %eax
        movl    %edx, %ecx
        notl    %edx
        andl    4(%eax), %edx
        andl    (%eax), %ecx
        movl    %edx, 4(%eax)
        sall    $2, %ecx
        andl    width_masks+4, %ecx
        orl     %ecx, %edx
        movl    %edx, 4(%eax)
        leave
        ret


ASSEMBLY OF VERSION #2 [ WITH INITIALIZATION]
------------------------------------------------
.globl eval_output_out
        .type   eval_output_out,@function
eval_output_out:
        movl    width_masks+4, %edx
        pushl   %ebp
        leal    0(,%edx,4), %eax
        movl    %esp, %ebp
        notl    %eax
        pushl   %ebx
        movl    8(%ebp), %ecx
        andl    %eax, %ebx
        movl    %edx, %eax
        andl    (%ecx), %eax
        notl    %edx
        andl    4(%ecx), %edx
        sall    $2, %eax
        movl    %edx, 4(%ecx)
        orl     %eax, %ebx
        andl    width_masks+4, %ebx
        orl     %ebx, %edx
        movl    %edx, 4(%ecx)
        movl    (%esp), %ebx
        leave
        ret

SOURCE CODE
------------------------------------


/* Width Masks*/
static unsigned int WM[32] =
{0x1,0x3,0x7,0xF,0x1F,0x3F,0x7F,0xFF,0x1FF,0x3FF,0x7FF,0xFFF,0x1FFF,
0x3FFF,0x7FFF,0xFFFF,0x1FFFF,0x3FFFF,0x7FFFF,0xFFFFF,0x1FFFFF,0x3FFFFF,
0x7FFFFF,0xFFFFFF,0x1FFFFFF,0x3
FFFFFF,0x7FFFFFF,0xFFFFFFF,0x1FFFFFFF,0x3FFFFFFF,0x7FFFFFFF,
0xFFFFFFFF};


typedef struct _top_test_model{
        unsigned int inputs[1];
        unsigned int outputs[1];
}top_test_model;

/* Logical Right Shift*/
#define LRS(x,y)        ( ((y)>=32)? 0x0: (x)>>(y) )

/* NAMING CONVENTION OF MACRO PARAMETERS
 *   iarr - integer array
 *   idx - index in array of integers
 *   sbp - starting bit position in variable to which operation is
applied
 *   bc - bit count, number of bits to be operated upon
 *   WM - common purpose width mask array defined above
*/


/* get value of "bc" bits starting from bit position "sbp" in variable
"i". variable "i" remains unchanged */
#define get_var_bits(i, sbp, bc) (LRS(((WM[bc-1] << (sbp)) & (i)),
(sbp)))

/* reset "bc" bits in variable "i" starting from bit position "sbp" */
#define rst_bits(i, sbp, bc)    ((i) &= ~(WM[bc-1] << (sbp)))

/* set "bc" bits from variable "val" in variable "i" starting from bit
position "sbp" */
#define set_var_bits(i, sbp, bc, val)   (i = (rst_bits(i, sbp, bc)) |
((val) << (sbp)))


/* same as macros above except that work upon "iaar[idx]" in stead of
variable "i" */
#define get_arr_var_bits(iarr, idx, sbp, bc)    (LRS(((WM[bc-1] << sbp)
& iarr[idx]),(sbp)))
#define rst_arr_var_bits(iarr, idx, sbp, bc)    (iarr[idx] &= ~(WM[bc-1]
<< (sbp)))
#define set_arr_var_bits(iarr, idx, sbp, bc, val)  (iarr[idx] =
rst_arr_var_bits(iarr, idx, sbp, bc) | ((val) << (sbp)))

void eval_output_out(top_test_model *model){
        unsigned int b=0;

        set_var_bits(b,2,2,get_arr_var_bits(model->inputs,0,0,2));
        set_arr_var_bits(model->outputs,0,0,2,get_var_bits(b,0,2));
}


int main (int argc, char *argv){
        top_test_model t;
        eval_output_out(&t);
}

-vipin


_______________________________________________
help-gplusplus mailing list
help-gplusplus@gnu.org
http://lists.gnu.org/mailman/listinfo/help-gplusplus

why unnecessary variable initialization is good with gcc -O3 and bad with just gcc

Reply via email to