Below is a piece of fully compilable bit manipulation code. Let's say we have two versions of the same code 1. variable "b" in "eval_output_out" is initialized 2. variable "b" in "eval_output_out" is NOT initialized
Code expands such that it is independent of past value of variable "b", i dumped pre processed output of two versions and there was just one difference of b initialized in one and not in other. QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc. if i just dump the assembly of two versions using "gcc -S" (i.e. without any optimization) then behavior is expected which is version #1 (with initialization) has one extra instruction to initialize variable. But when i dump the assembly of two versions with "gcc -O3 -S" assembly of version 2 (without initialization) takes 4 extra instruction. Can somebody please explain me why the unnecessary variable initialization with "0" is good in -O3 option. Though macros never check the previous value of variables to be "0" or any other value. I have attached the fully compilable source code and assembly of the code with two version ASSEMBLY OF VERSION #1 [ WITH INITIALIZATION] ------------------------------------------------ .globl eval_output_out .type eval_output_out,@function eval_output_out: pushl %ebp movl width_masks+4, %edx movl %esp, %ebp movl 8(%ebp), %eax movl %edx, %ecx notl %edx andl 4(%eax), %edx andl (%eax), %ecx movl %edx, 4(%eax) sall $2, %ecx andl width_masks+4, %ecx orl %ecx, %edx movl %edx, 4(%eax) leave ret ASSEMBLY OF VERSION #2 [ WITH INITIALIZATION] ------------------------------------------------ .globl eval_output_out .type eval_output_out,@function eval_output_out: movl width_masks+4, %edx pushl %ebp leal 0(,%edx,4), %eax movl %esp, %ebp notl %eax pushl %ebx movl 8(%ebp), %ecx andl %eax, %ebx movl %edx, %eax andl (%ecx), %eax notl %edx andl 4(%ecx), %edx sall $2, %eax movl %edx, 4(%ecx) orl %eax, %ebx andl width_masks+4, %ebx orl %ebx, %edx movl %edx, 4(%ecx) movl (%esp), %ebx leave ret SOURCE CODE ------------------------------------ /* Width Masks*/ static unsigned int WM[32] = {0x1,0x3,0x7,0xF,0x1F,0x3F,0x7F,0xFF,0x1FF,0x3FF,0x7FF,0xFFF,0x1FFF, 0x3FFF,0x7FFF,0xFFFF,0x1FFFF,0x3FFFF,0x7FFFF,0xFFFFF,0x1FFFFF,0x3FFFFF, 0x7FFFFF,0xFFFFFF,0x1FFFFFF,0x3 FFFFFF,0x7FFFFFF,0xFFFFFFF,0x1FFFFFFF,0x3FFFFFFF,0x7FFFFFFF, 0xFFFFFFFF}; typedef struct _top_test_model{ unsigned int inputs[1]; unsigned int outputs[1]; }top_test_model; /* Logical Right Shift*/ #define LRS(x,y) ( ((y)>=32)? 0x0: (x)>>(y) ) /* NAMING CONVENTION OF MACRO PARAMETERS * iarr - integer array * idx - index in array of integers * sbp - starting bit position in variable to which operation is applied * bc - bit count, number of bits to be operated upon * WM - common purpose width mask array defined above */ /* get value of "bc" bits starting from bit position "sbp" in variable "i". variable "i" remains unchanged */ #define get_var_bits(i, sbp, bc) (LRS(((WM[bc-1] << (sbp)) & (i)), (sbp))) /* reset "bc" bits in variable "i" starting from bit position "sbp" */ #define rst_bits(i, sbp, bc) ((i) &= ~(WM[bc-1] << (sbp))) /* set "bc" bits from variable "val" in variable "i" starting from bit position "sbp" */ #define set_var_bits(i, sbp, bc, val) (i = (rst_bits(i, sbp, bc)) | ((val) << (sbp))) /* same as macros above except that work upon "iaar[idx]" in stead of variable "i" */ #define get_arr_var_bits(iarr, idx, sbp, bc) (LRS(((WM[bc-1] << sbp) & iarr[idx]),(sbp))) #define rst_arr_var_bits(iarr, idx, sbp, bc) (iarr[idx] &= ~(WM[bc-1] << (sbp))) #define set_arr_var_bits(iarr, idx, sbp, bc, val) (iarr[idx] = rst_arr_var_bits(iarr, idx, sbp, bc) | ((val) << (sbp))) void eval_output_out(top_test_model *model){ unsigned int b=0; set_var_bits(b,2,2,get_arr_var_bits(model->inputs,0,0,2)); set_arr_var_bits(model->outputs,0,0,2,get_var_bits(b,0,2)); } int main (int argc, char *argv){ top_test_model t; eval_output_out(&t); } -vipin _______________________________________________ help-gplusplus mailing list help-gplusplus@gnu.org http://lists.gnu.org/mailman/listinfo/help-gplusplus