Am Freitag 15 Oktober 2004 14:41 schrieb Paulo Marques:
> karsten wiese wrote:
> > UHCI_NUMFRAMES will never change, it is a standard.
>
> Ok, then
>
> > I'll recheck with gcc 3.4 of fc3. I guess it'll stay the
> > same. Anyhow this is interrupt level, time critical code so
> > I think the patch is the right way to go.
>
> I just checked this code:
>
> unsigned int test(void)
> {
> return 10;
> }
>
> int main(int argc, char *argv[])
> {
> unsigned int a;
>
> a = test();
>
> printf("%d\n", a % 8);
>
> return 0;
> }
>
> The "test" function is just to not let the compiler optimize away
> everything because it knows the value of "a". (although, it it was a
> little smarter it could figure out even that :)
>
> With gcc 3.3.2 it produces this output:
>
> .type main, @function
> main:
> pushl %ebp
> movl %esp, %ebp
> pushl %ecx
> pushl %ecx
> andl $-16, %esp
> call test
> andl $7, %eax
> ^^^^^^^^^^^^^^^^
> pushl %edx
> pushl %edx
> pushl %eax
> pushl $.LC0
> call printf
> xorl %eax, %eax
> leave
> ret
>
> So it works as expected...
>
in uhci-hcd.ko gcc 3.3.2 (changed Makefile to use gcc 3.3.2) shows one spot where it
optimizes
by using the "and" operation and two other spots, where it fails to use the "and"
operation:
(gdb) disassemble uhci_insert_td_frame_list
Dump of assembler code for function uhci_insert_td_frame_list:
0x000019d0 <uhci_insert_td_frame_list+0>: sub $0x8,%esp
0x000019d3 <uhci_insert_td_frame_list+3>: mov 0x14(%esp,1),%ecx
0x000019d7 <uhci_insert_td_frame_list+7>: mov %esi,0x4(%esp,1)
0x000019db <uhci_insert_td_frame_list+11>: mov 0x10(%esp,1),%esi
0x000019df <uhci_insert_td_frame_list+15>: and $0x3ff,%ecx
<cut>
the "and" is here for the patched and the original uhci-hcd.ko. like in your test case.
But look at the unpatched uhci-hcd's assembler code of isochronous_find_start:
Dump of assembler code for function isochronous_find_start:
0x00002f10 <isochronous_find_start+0>: sub $0x20,%esp
0x00002f13 <isochronous_find_start+3>: xor %eax,%eax
0x00002f15 <isochronous_find_start+5>: mov %ebx,0x18(%esp,1)
0x00002f19 <isochronous_find_start+9>: mov 0x28(%esp,1),%ebx
0x00002f1d <isochronous_find_start+13>: mov %eax,0x10(%esp,1)
0x00002f21 <isochronous_find_start+17>: xor %eax,%eax
0x00002f23 <isochronous_find_start+19>: mov %esi,0x1c(%esp,1)
0x00002f27 <isochronous_find_start+23>: mov 0x24(%esp,1),%esi
0x00002f2b <isochronous_find_start+27>: mov %eax,0x14(%esp,1)
0x00002f2f <isochronous_find_start+31>: cmpl $0x384,0x48(%ebx)
0x00002f36 <isochronous_find_start+38>: jg 0x2fcb <isochronous_find_start+187>
0x00002f3c <isochronous_find_start+44>: mov %ebx,0x4(%esp,1)
0x00002f40 <isochronous_find_start+48>: lea 0x14(%esp,1),%eax
0x00002f44 <isochronous_find_start+52>: mov %eax,0xc(%esp,1)
0x00002f48 <isochronous_find_start+56>: lea 0x10(%esp,1),%eax
0x00002f4c <isochronous_find_start+60>: mov %eax,0x8(%esp,1)
0x00002f50 <isochronous_find_start+64>: mov %esi,(%esp,1)
0x00002f53 <isochronous_find_start+67>: call 0x2e70 <isochronous_find_limits>
0x00002f58 <isochronous_find_start+72>: testb $0x2,0x28(%ebx)
0x00002f5c <isochronous_find_start+76>: je 0x2fb0 <isochronous_find_start+160>
0x00002f5e <isochronous_find_start+78>: test %eax,%eax
0x00002f60 <isochronous_find_start+80>: jne 0x2f77 <isochronous_find_start+103>
0x00002f62 <isochronous_find_start+82>: mov 0x14(%esp,1),%eax
0x00002f66 <isochronous_find_start+86>: mov %eax,0x44(%ebx)
0x00002f69 <isochronous_find_start+89>: xor %eax,%eax
0x00002f6b <isochronous_find_start+91>: mov 0x18(%esp,1),%ebx
0x00002f6f <isochronous_find_start+95>: mov 0x1c(%esp,1),%esi
0x00002f73 <isochronous_find_start+99>: add $0x20,%esp
0x00002f76 <isochronous_find_start+102>: ret
0x00002f77 <isochronous_find_start+103>: mov %esi,(%esp,1)
0x00002f7a <isochronous_find_start+106>: call 0x3930
<uhci_get_current_frame_number>
0x00002f7f <isochronous_find_start+111>: cmp $0xffffffff,%eax
0x00002f82 <isochronous_find_start+114>: lea 0x3ff(%eax),%edx
0x00002f88 <isochronous_find_start+120>: cmovg %eax,%edx
0x00002f8b <isochronous_find_start+123>: and $0xfffffc00,%edx
0x00002f91 <isochronous_find_start+129>: sub %edx,%eax
0x00002f93 <isochronous_find_start+131>: lea 0x409(%eax),%edx
0x00002f99 <isochronous_find_start+137>: add $0xa,%eax
0x00002f9c <isochronous_find_start+140>: cmovns %eax,%edx
0x00002f9f <isochronous_find_start+143>: and $0xfffffc00,%edx
---Type <return> to continue, or q <return> to quit---
0x00002fa5 <isochronous_find_start+149>: sub %edx,%eax
0x00002fa7 <isochronous_find_start+151>: jmp 0x2f66
<isochronous_find_start+86>
0x00002fa9 <isochronous_find_start+153>: lea 0x0(%esi,1),%esi
0x00002fb0 <isochronous_find_start+160>: mov 0x44(%ebx),%edx
0x00002fb3 <isochronous_find_start+163>: cmp $0xffffffff,%edx
0x00002fb6 <isochronous_find_start+166>: lea 0x3ff(%edx),%eax
0x00002fbc <isochronous_find_start+172>: cmovg %edx,%eax
0x00002fbf <isochronous_find_start+175>: and $0xfffffc00,%eax
0x00002fc4 <isochronous_find_start+180>: sub %eax,%edx
0x00002fc6 <isochronous_find_start+182>: mov %edx,0x44(%ebx)
0x00002fc9 <isochronous_find_start+185>: jmp 0x2f69
<isochronous_find_start+89>
0x00002fcb <isochronous_find_start+187>: mov $0xffffffe5,%eax
0x00002fd0 <isochronous_find_start+192>: jmp 0x2f6b
<isochronous_find_start+91>
0x00002fd2 <isochronous_find_start+194>: lea 0x0(%esi,1),%esi
0x00002fd9 <isochronous_find_start+201>: lea 0x0(%edi,1),%edi
End of assembler dump.
And compare it to the patche uihci-hcd's isochronous_find_start:
Dump of assembler code for function isochronous_find_start:
0x00002f10 <isochronous_find_start+0>: sub $0x20,%esp
0x00002f13 <isochronous_find_start+3>: xor %eax,%eax
0x00002f15 <isochronous_find_start+5>: mov %ebx,0x18(%esp,1)
0x00002f19 <isochronous_find_start+9>: mov 0x28(%esp,1),%ebx
0x00002f1d <isochronous_find_start+13>: mov %eax,0x10(%esp,1)
0x00002f21 <isochronous_find_start+17>: xor %eax,%eax
0x00002f23 <isochronous_find_start+19>: mov %esi,0x1c(%esp,1)
0x00002f27 <isochronous_find_start+23>: mov 0x24(%esp,1),%esi
0x00002f2b <isochronous_find_start+27>: mov %eax,0x14(%esp,1)
0x00002f2f <isochronous_find_start+31>: cmpl $0x384,0x48(%ebx)
0x00002f36 <isochronous_find_start+38>: jg 0x2f8e <isochronous_find_start+126>
0x00002f38 <isochronous_find_start+40>: mov %ebx,0x4(%esp,1)
0x00002f3c <isochronous_find_start+44>: lea 0x14(%esp,1),%eax
0x00002f40 <isochronous_find_start+48>: mov %eax,0xc(%esp,1)
0x00002f44 <isochronous_find_start+52>: lea 0x10(%esp,1),%eax
0x00002f48 <isochronous_find_start+56>: mov %eax,0x8(%esp,1)
0x00002f4c <isochronous_find_start+60>: mov %esi,(%esp,1)
0x00002f4f <isochronous_find_start+63>: call 0x2e70 <isochronous_find_limits>
0x00002f54 <isochronous_find_start+68>: testb $0x2,0x28(%ebx)
0x00002f58 <isochronous_find_start+72>: je 0x2f85 <isochronous_find_start+117>
0x00002f5a <isochronous_find_start+74>: test %eax,%eax
0x00002f5c <isochronous_find_start+76>: jne 0x2f73 <isochronous_find_start+99>
0x00002f5e <isochronous_find_start+78>: mov 0x14(%esp,1),%eax
0x00002f62 <isochronous_find_start+82>: mov %eax,0x44(%ebx)
0x00002f65 <isochronous_find_start+85>: xor %eax,%eax
0x00002f67 <isochronous_find_start+87>: mov 0x18(%esp,1),%ebx
0x00002f6b <isochronous_find_start+91>: mov 0x1c(%esp,1),%esi
0x00002f6f <isochronous_find_start+95>: add $0x20,%esp
0x00002f72 <isochronous_find_start+98>: ret
0x00002f73 <isochronous_find_start+99>: mov %esi,(%esp,1)
0x00002f76 <isochronous_find_start+102>: call 0x38f0
<uhci_get_current_frame_number>
0x00002f7b <isochronous_find_start+107>: add $0xa,%eax
0x00002f7e <isochronous_find_start+110>: and $0x3ff,%eax
0x00002f83 <isochronous_find_start+115>: jmp 0x2f62
<isochronous_find_start+82>
0x00002f85 <isochronous_find_start+117>: andl $0x3ff,0x44(%ebx)
0x00002f8c <isochronous_find_start+124>: jmp 0x2f65
<isochronous_find_start+85>
0x00002f8e <isochronous_find_start+126>: mov $0xffffffe5,%eax
0x00002f93 <isochronous_find_start+131>: jmp 0x2f67
<isochronous_find_start+87>
0x00002f95 <isochronous_find_start+133>: lea 0x0(%esi,1),%esi
0x00002f99 <isochronous_find_start+137>: lea 0x0(%edi,1),%edi
---Type <return> to continue, or q <return> to quit---
End of assembler dump.
You'll agree that the patched versions assembler code is better by 64 bytes, no?
So up to gcc 3.3.2 we can't be shure that gcc optimizes "% PowerOf2" to "& (PowerOf2 -
1)".
Best regards,
Karsten
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
[EMAIL PROTECTED]
To unsubscribe, use the last form field at:
https://lists.sourceforge.net/lists/listinfo/linux-usb-devel