Am Freitag 15 Oktober 2004 14:41 schrieb Paulo Marques:
> karsten wiese wrote:
> > UHCI_NUMFRAMES will never change, it is a standard.
>
> Ok, then
>
> > I'll recheck with gcc 3.4 of fc3. I guess it'll stay the
> > same. Anyhow this is interrupt level, time critical code so
> > I think the patch is the right way to go.
>
> I just checked this code:
>
> unsigned int test(void)
> {
>    return 10;
> }
>
> int main(int argc, char *argv[])
> {
>    unsigned int a;
>
>    a = test();
>
>    printf("%d\n", a % 8);
>
>    return 0;
> }
>
> The "test" function is just to not let the compiler optimize away
> everything because it knows the value of "a". (although, it it was a
> little smarter it could figure out even that :)
>
> With gcc 3.3.2 it produces this output:
>
>       .type   main, @function
> main:
>       pushl   %ebp
>       movl    %esp, %ebp
>       pushl   %ecx
>       pushl   %ecx
>       andl    $-16, %esp
>       call    test
>       andl    $7, %eax
>          ^^^^^^^^^^^^^^^^
>       pushl   %edx
>       pushl   %edx
>       pushl   %eax
>       pushl   $.LC0
>       call    printf
>       xorl    %eax, %eax
>       leave
>       ret
>
> So it works as expected...
>
in uhci-hcd.ko gcc 3.3.2 (changed Makefile to use gcc 3.3.2) shows one spot where it 
optimizes
 by using the "and" operation and two other spots, where it fails to use the "and" 
operation:

(gdb) disassemble uhci_insert_td_frame_list
Dump of assembler code for function uhci_insert_td_frame_list:
0x000019d0 <uhci_insert_td_frame_list+0>:       sub    $0x8,%esp
0x000019d3 <uhci_insert_td_frame_list+3>:       mov    0x14(%esp,1),%ecx
0x000019d7 <uhci_insert_td_frame_list+7>:       mov    %esi,0x4(%esp,1)
0x000019db <uhci_insert_td_frame_list+11>:      mov    0x10(%esp,1),%esi
0x000019df <uhci_insert_td_frame_list+15>:      and    $0x3ff,%ecx
<cut>

the "and" is here for the patched and the original uhci-hcd.ko. like in your test case.

But look at the unpatched uhci-hcd's assembler code of isochronous_find_start:

Dump of assembler code for function isochronous_find_start:
0x00002f10 <isochronous_find_start+0>:  sub    $0x20,%esp
0x00002f13 <isochronous_find_start+3>:  xor    %eax,%eax
0x00002f15 <isochronous_find_start+5>:  mov    %ebx,0x18(%esp,1)
0x00002f19 <isochronous_find_start+9>:  mov    0x28(%esp,1),%ebx
0x00002f1d <isochronous_find_start+13>: mov    %eax,0x10(%esp,1)
0x00002f21 <isochronous_find_start+17>: xor    %eax,%eax
0x00002f23 <isochronous_find_start+19>: mov    %esi,0x1c(%esp,1)
0x00002f27 <isochronous_find_start+23>: mov    0x24(%esp,1),%esi
0x00002f2b <isochronous_find_start+27>: mov    %eax,0x14(%esp,1)
0x00002f2f <isochronous_find_start+31>: cmpl   $0x384,0x48(%ebx)
0x00002f36 <isochronous_find_start+38>: jg     0x2fcb <isochronous_find_start+187>
0x00002f3c <isochronous_find_start+44>: mov    %ebx,0x4(%esp,1)
0x00002f40 <isochronous_find_start+48>: lea    0x14(%esp,1),%eax
0x00002f44 <isochronous_find_start+52>: mov    %eax,0xc(%esp,1)
0x00002f48 <isochronous_find_start+56>: lea    0x10(%esp,1),%eax
0x00002f4c <isochronous_find_start+60>: mov    %eax,0x8(%esp,1)
0x00002f50 <isochronous_find_start+64>: mov    %esi,(%esp,1)
0x00002f53 <isochronous_find_start+67>: call   0x2e70 <isochronous_find_limits>
0x00002f58 <isochronous_find_start+72>: testb  $0x2,0x28(%ebx)
0x00002f5c <isochronous_find_start+76>: je     0x2fb0 <isochronous_find_start+160>
0x00002f5e <isochronous_find_start+78>: test   %eax,%eax
0x00002f60 <isochronous_find_start+80>: jne    0x2f77 <isochronous_find_start+103>
0x00002f62 <isochronous_find_start+82>: mov    0x14(%esp,1),%eax
0x00002f66 <isochronous_find_start+86>: mov    %eax,0x44(%ebx)
0x00002f69 <isochronous_find_start+89>: xor    %eax,%eax
0x00002f6b <isochronous_find_start+91>: mov    0x18(%esp,1),%ebx
0x00002f6f <isochronous_find_start+95>: mov    0x1c(%esp,1),%esi
0x00002f73 <isochronous_find_start+99>: add    $0x20,%esp
0x00002f76 <isochronous_find_start+102>:        ret
0x00002f77 <isochronous_find_start+103>:        mov    %esi,(%esp,1)
0x00002f7a <isochronous_find_start+106>:        call   0x3930 
<uhci_get_current_frame_number>
0x00002f7f <isochronous_find_start+111>:        cmp    $0xffffffff,%eax
0x00002f82 <isochronous_find_start+114>:        lea    0x3ff(%eax),%edx
0x00002f88 <isochronous_find_start+120>:        cmovg  %eax,%edx
0x00002f8b <isochronous_find_start+123>:        and    $0xfffffc00,%edx
0x00002f91 <isochronous_find_start+129>:        sub    %edx,%eax
0x00002f93 <isochronous_find_start+131>:        lea    0x409(%eax),%edx
0x00002f99 <isochronous_find_start+137>:        add    $0xa,%eax
0x00002f9c <isochronous_find_start+140>:        cmovns %eax,%edx
0x00002f9f <isochronous_find_start+143>:        and    $0xfffffc00,%edx
---Type <return> to continue, or q <return> to quit---
0x00002fa5 <isochronous_find_start+149>:        sub    %edx,%eax
0x00002fa7 <isochronous_find_start+151>:        jmp    0x2f66 
<isochronous_find_start+86>
0x00002fa9 <isochronous_find_start+153>:        lea    0x0(%esi,1),%esi
0x00002fb0 <isochronous_find_start+160>:        mov    0x44(%ebx),%edx
0x00002fb3 <isochronous_find_start+163>:        cmp    $0xffffffff,%edx
0x00002fb6 <isochronous_find_start+166>:        lea    0x3ff(%edx),%eax
0x00002fbc <isochronous_find_start+172>:        cmovg  %edx,%eax
0x00002fbf <isochronous_find_start+175>:        and    $0xfffffc00,%eax
0x00002fc4 <isochronous_find_start+180>:        sub    %eax,%edx
0x00002fc6 <isochronous_find_start+182>:        mov    %edx,0x44(%ebx)
0x00002fc9 <isochronous_find_start+185>:        jmp    0x2f69 
<isochronous_find_start+89>
0x00002fcb <isochronous_find_start+187>:        mov    $0xffffffe5,%eax
0x00002fd0 <isochronous_find_start+192>:        jmp    0x2f6b 
<isochronous_find_start+91>
0x00002fd2 <isochronous_find_start+194>:        lea    0x0(%esi,1),%esi
0x00002fd9 <isochronous_find_start+201>:        lea    0x0(%edi,1),%edi
End of assembler dump.


And compare it to the patche uihci-hcd's isochronous_find_start:

Dump of assembler code for function isochronous_find_start:
0x00002f10 <isochronous_find_start+0>:  sub    $0x20,%esp
0x00002f13 <isochronous_find_start+3>:  xor    %eax,%eax
0x00002f15 <isochronous_find_start+5>:  mov    %ebx,0x18(%esp,1)
0x00002f19 <isochronous_find_start+9>:  mov    0x28(%esp,1),%ebx
0x00002f1d <isochronous_find_start+13>: mov    %eax,0x10(%esp,1)
0x00002f21 <isochronous_find_start+17>: xor    %eax,%eax
0x00002f23 <isochronous_find_start+19>: mov    %esi,0x1c(%esp,1)
0x00002f27 <isochronous_find_start+23>: mov    0x24(%esp,1),%esi
0x00002f2b <isochronous_find_start+27>: mov    %eax,0x14(%esp,1)
0x00002f2f <isochronous_find_start+31>: cmpl   $0x384,0x48(%ebx)
0x00002f36 <isochronous_find_start+38>: jg     0x2f8e <isochronous_find_start+126>
0x00002f38 <isochronous_find_start+40>: mov    %ebx,0x4(%esp,1)
0x00002f3c <isochronous_find_start+44>: lea    0x14(%esp,1),%eax
0x00002f40 <isochronous_find_start+48>: mov    %eax,0xc(%esp,1)
0x00002f44 <isochronous_find_start+52>: lea    0x10(%esp,1),%eax
0x00002f48 <isochronous_find_start+56>: mov    %eax,0x8(%esp,1)
0x00002f4c <isochronous_find_start+60>: mov    %esi,(%esp,1)
0x00002f4f <isochronous_find_start+63>: call   0x2e70 <isochronous_find_limits>
0x00002f54 <isochronous_find_start+68>: testb  $0x2,0x28(%ebx)
0x00002f58 <isochronous_find_start+72>: je     0x2f85 <isochronous_find_start+117>
0x00002f5a <isochronous_find_start+74>: test   %eax,%eax
0x00002f5c <isochronous_find_start+76>: jne    0x2f73 <isochronous_find_start+99>
0x00002f5e <isochronous_find_start+78>: mov    0x14(%esp,1),%eax
0x00002f62 <isochronous_find_start+82>: mov    %eax,0x44(%ebx)
0x00002f65 <isochronous_find_start+85>: xor    %eax,%eax
0x00002f67 <isochronous_find_start+87>: mov    0x18(%esp,1),%ebx
0x00002f6b <isochronous_find_start+91>: mov    0x1c(%esp,1),%esi
0x00002f6f <isochronous_find_start+95>: add    $0x20,%esp
0x00002f72 <isochronous_find_start+98>: ret
0x00002f73 <isochronous_find_start+99>: mov    %esi,(%esp,1)
0x00002f76 <isochronous_find_start+102>:        call   0x38f0 
<uhci_get_current_frame_number>
0x00002f7b <isochronous_find_start+107>:        add    $0xa,%eax
0x00002f7e <isochronous_find_start+110>:        and    $0x3ff,%eax
0x00002f83 <isochronous_find_start+115>:        jmp    0x2f62 
<isochronous_find_start+82>
0x00002f85 <isochronous_find_start+117>:        andl   $0x3ff,0x44(%ebx)
0x00002f8c <isochronous_find_start+124>:        jmp    0x2f65 
<isochronous_find_start+85>
0x00002f8e <isochronous_find_start+126>:        mov    $0xffffffe5,%eax
0x00002f93 <isochronous_find_start+131>:        jmp    0x2f67 
<isochronous_find_start+87>
0x00002f95 <isochronous_find_start+133>:        lea    0x0(%esi,1),%esi
0x00002f99 <isochronous_find_start+137>:        lea    0x0(%edi,1),%edi
---Type <return> to continue, or q <return> to quit---
End of assembler dump.

You'll agree that the patched versions assembler code is better by 64 bytes, no?
So up to gcc 3.3.2 we can't be shure that gcc optimizes "% PowerOf2" to "& (PowerOf2 - 
1)".

Best regards,
Karsten




-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
[EMAIL PROTECTED]
To unsubscribe, use the last form field at:
https://lists.sourceforge.net/lists/listinfo/linux-usb-devel

Reply via email to