GCC has no idea about optimization even if the optimization is turned
to the maximum:

unsigned long long x(unsigned lo, unsigned hi)
{
                        return ((unsigned long long)hi << 32) | lo;             
         
}

gcc -O3 -c -o a.o a.c; objdump -d a.o:

   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   8b 4d 0c                mov    0xc(%ebp),%ecx
   6:   53                      push   %ebx
   7:   89 ca                   mov    %ecx,%edx
   9:   31 db                   xor    %ebx,%ebx
   b:   8b 4d 08                mov    0x8(%ebp),%ecx
   e:   31 c0                   xor    %eax,%eax
  10:   09 da                   or     %ebx,%edx
                                ^^^ %ebx is zero here, can be thrown out
  12:   09 c8                   or     %ecx,%eax
                                ^^^ %ax is zero here, can be replaced with mov 
%ecx, %eax
  14:   8b 1c 24                mov    (%esp),%ebx
  17:   c9                      leave  
  18:   c3                      ret    

After seeing this I am not sure if GCC has a peephole optimizer but if they
have they have to add following rules:

or reg1, reg2 where reg1 is containing 0 can be thrown out
or reg1, reg2 where reg2 is containing 0 can be replaced with mov reg1, reg2
 and possibly further peephole optimized

After some manual rewrite the function shrinks significantly to:

55                      push   %ebp
89 e5                   mov    %esp,%ebp
8b 4d 0c                mov    0xc(%ebp),%edx
53                      push   %ebx
8b 4d 08                mov    0x8(%ebp),%eax
8b 1c 24                mov    (%esp),%ebx
c9                      leave   
c3                      ret     

CL<

Reply via email to