Am Freitag 15 Oktober 2004 14:41 schrieb Paulo Marques: > karsten wiese wrote: > > UHCI_NUMFRAMES will never change, it is a standard. > > Ok, then > > > I'll recheck with gcc 3.4 of fc3. I guess it'll stay the > > same. Anyhow this is interrupt level, time critical code so > > I think the patch is the right way to go. > > I just checked this code: > > unsigned int test(void) > { > return 10; > } > > int main(int argc, char *argv[]) > { > unsigned int a; > > a = test(); > > printf("%d\n", a % 8); > > return 0; > } > > The "test" function is just to not let the compiler optimize away > everything because it knows the value of "a". (although, it it was a > little smarter it could figure out even that :) > > With gcc 3.3.2 it produces this output: > > .type main, @function > main: > pushl %ebp > movl %esp, %ebp > pushl %ecx > pushl %ecx > andl $-16, %esp > call test > andl $7, %eax > ^^^^^^^^^^^^^^^^ > pushl %edx > pushl %edx > pushl %eax > pushl $.LC0 > call printf > xorl %eax, %eax > leave > ret > > So it works as expected... > in uhci-hcd.ko gcc 3.3.2 (changed Makefile to use gcc 3.3.2) shows one spot where it optimizes by using the "and" operation and two other spots, where it fails to use the "and" operation:
(gdb) disassemble uhci_insert_td_frame_list Dump of assembler code for function uhci_insert_td_frame_list: 0x000019d0 <uhci_insert_td_frame_list+0>: sub $0x8,%esp 0x000019d3 <uhci_insert_td_frame_list+3>: mov 0x14(%esp,1),%ecx 0x000019d7 <uhci_insert_td_frame_list+7>: mov %esi,0x4(%esp,1) 0x000019db <uhci_insert_td_frame_list+11>: mov 0x10(%esp,1),%esi 0x000019df <uhci_insert_td_frame_list+15>: and $0x3ff,%ecx <cut> the "and" is here for the patched and the original uhci-hcd.ko. like in your test case. But look at the unpatched uhci-hcd's assembler code of isochronous_find_start: Dump of assembler code for function isochronous_find_start: 0x00002f10 <isochronous_find_start+0>: sub $0x20,%esp 0x00002f13 <isochronous_find_start+3>: xor %eax,%eax 0x00002f15 <isochronous_find_start+5>: mov %ebx,0x18(%esp,1) 0x00002f19 <isochronous_find_start+9>: mov 0x28(%esp,1),%ebx 0x00002f1d <isochronous_find_start+13>: mov %eax,0x10(%esp,1) 0x00002f21 <isochronous_find_start+17>: xor %eax,%eax 0x00002f23 <isochronous_find_start+19>: mov %esi,0x1c(%esp,1) 0x00002f27 <isochronous_find_start+23>: mov 0x24(%esp,1),%esi 0x00002f2b <isochronous_find_start+27>: mov %eax,0x14(%esp,1) 0x00002f2f <isochronous_find_start+31>: cmpl $0x384,0x48(%ebx) 0x00002f36 <isochronous_find_start+38>: jg 0x2fcb <isochronous_find_start+187> 0x00002f3c <isochronous_find_start+44>: mov %ebx,0x4(%esp,1) 0x00002f40 <isochronous_find_start+48>: lea 0x14(%esp,1),%eax 0x00002f44 <isochronous_find_start+52>: mov %eax,0xc(%esp,1) 0x00002f48 <isochronous_find_start+56>: lea 0x10(%esp,1),%eax 0x00002f4c <isochronous_find_start+60>: mov %eax,0x8(%esp,1) 0x00002f50 <isochronous_find_start+64>: mov %esi,(%esp,1) 0x00002f53 <isochronous_find_start+67>: call 0x2e70 <isochronous_find_limits> 0x00002f58 <isochronous_find_start+72>: testb $0x2,0x28(%ebx) 0x00002f5c <isochronous_find_start+76>: je 0x2fb0 <isochronous_find_start+160> 0x00002f5e <isochronous_find_start+78>: test %eax,%eax 0x00002f60 <isochronous_find_start+80>: jne 0x2f77 <isochronous_find_start+103> 0x00002f62 <isochronous_find_start+82>: mov 0x14(%esp,1),%eax 0x00002f66 <isochronous_find_start+86>: mov %eax,0x44(%ebx) 0x00002f69 <isochronous_find_start+89>: xor %eax,%eax 0x00002f6b <isochronous_find_start+91>: mov 0x18(%esp,1),%ebx 0x00002f6f <isochronous_find_start+95>: mov 0x1c(%esp,1),%esi 0x00002f73 <isochronous_find_start+99>: add $0x20,%esp 0x00002f76 <isochronous_find_start+102>: ret 0x00002f77 <isochronous_find_start+103>: mov %esi,(%esp,1) 0x00002f7a <isochronous_find_start+106>: call 0x3930 <uhci_get_current_frame_number> 0x00002f7f <isochronous_find_start+111>: cmp $0xffffffff,%eax 0x00002f82 <isochronous_find_start+114>: lea 0x3ff(%eax),%edx 0x00002f88 <isochronous_find_start+120>: cmovg %eax,%edx 0x00002f8b <isochronous_find_start+123>: and $0xfffffc00,%edx 0x00002f91 <isochronous_find_start+129>: sub %edx,%eax 0x00002f93 <isochronous_find_start+131>: lea 0x409(%eax),%edx 0x00002f99 <isochronous_find_start+137>: add $0xa,%eax 0x00002f9c <isochronous_find_start+140>: cmovns %eax,%edx 0x00002f9f <isochronous_find_start+143>: and $0xfffffc00,%edx ---Type <return> to continue, or q <return> to quit--- 0x00002fa5 <isochronous_find_start+149>: sub %edx,%eax 0x00002fa7 <isochronous_find_start+151>: jmp 0x2f66 <isochronous_find_start+86> 0x00002fa9 <isochronous_find_start+153>: lea 0x0(%esi,1),%esi 0x00002fb0 <isochronous_find_start+160>: mov 0x44(%ebx),%edx 0x00002fb3 <isochronous_find_start+163>: cmp $0xffffffff,%edx 0x00002fb6 <isochronous_find_start+166>: lea 0x3ff(%edx),%eax 0x00002fbc <isochronous_find_start+172>: cmovg %edx,%eax 0x00002fbf <isochronous_find_start+175>: and $0xfffffc00,%eax 0x00002fc4 <isochronous_find_start+180>: sub %eax,%edx 0x00002fc6 <isochronous_find_start+182>: mov %edx,0x44(%ebx) 0x00002fc9 <isochronous_find_start+185>: jmp 0x2f69 <isochronous_find_start+89> 0x00002fcb <isochronous_find_start+187>: mov $0xffffffe5,%eax 0x00002fd0 <isochronous_find_start+192>: jmp 0x2f6b <isochronous_find_start+91> 0x00002fd2 <isochronous_find_start+194>: lea 0x0(%esi,1),%esi 0x00002fd9 <isochronous_find_start+201>: lea 0x0(%edi,1),%edi End of assembler dump. And compare it to the patche uihci-hcd's isochronous_find_start: Dump of assembler code for function isochronous_find_start: 0x00002f10 <isochronous_find_start+0>: sub $0x20,%esp 0x00002f13 <isochronous_find_start+3>: xor %eax,%eax 0x00002f15 <isochronous_find_start+5>: mov %ebx,0x18(%esp,1) 0x00002f19 <isochronous_find_start+9>: mov 0x28(%esp,1),%ebx 0x00002f1d <isochronous_find_start+13>: mov %eax,0x10(%esp,1) 0x00002f21 <isochronous_find_start+17>: xor %eax,%eax 0x00002f23 <isochronous_find_start+19>: mov %esi,0x1c(%esp,1) 0x00002f27 <isochronous_find_start+23>: mov 0x24(%esp,1),%esi 0x00002f2b <isochronous_find_start+27>: mov %eax,0x14(%esp,1) 0x00002f2f <isochronous_find_start+31>: cmpl $0x384,0x48(%ebx) 0x00002f36 <isochronous_find_start+38>: jg 0x2f8e <isochronous_find_start+126> 0x00002f38 <isochronous_find_start+40>: mov %ebx,0x4(%esp,1) 0x00002f3c <isochronous_find_start+44>: lea 0x14(%esp,1),%eax 0x00002f40 <isochronous_find_start+48>: mov %eax,0xc(%esp,1) 0x00002f44 <isochronous_find_start+52>: lea 0x10(%esp,1),%eax 0x00002f48 <isochronous_find_start+56>: mov %eax,0x8(%esp,1) 0x00002f4c <isochronous_find_start+60>: mov %esi,(%esp,1) 0x00002f4f <isochronous_find_start+63>: call 0x2e70 <isochronous_find_limits> 0x00002f54 <isochronous_find_start+68>: testb $0x2,0x28(%ebx) 0x00002f58 <isochronous_find_start+72>: je 0x2f85 <isochronous_find_start+117> 0x00002f5a <isochronous_find_start+74>: test %eax,%eax 0x00002f5c <isochronous_find_start+76>: jne 0x2f73 <isochronous_find_start+99> 0x00002f5e <isochronous_find_start+78>: mov 0x14(%esp,1),%eax 0x00002f62 <isochronous_find_start+82>: mov %eax,0x44(%ebx) 0x00002f65 <isochronous_find_start+85>: xor %eax,%eax 0x00002f67 <isochronous_find_start+87>: mov 0x18(%esp,1),%ebx 0x00002f6b <isochronous_find_start+91>: mov 0x1c(%esp,1),%esi 0x00002f6f <isochronous_find_start+95>: add $0x20,%esp 0x00002f72 <isochronous_find_start+98>: ret 0x00002f73 <isochronous_find_start+99>: mov %esi,(%esp,1) 0x00002f76 <isochronous_find_start+102>: call 0x38f0 <uhci_get_current_frame_number> 0x00002f7b <isochronous_find_start+107>: add $0xa,%eax 0x00002f7e <isochronous_find_start+110>: and $0x3ff,%eax 0x00002f83 <isochronous_find_start+115>: jmp 0x2f62 <isochronous_find_start+82> 0x00002f85 <isochronous_find_start+117>: andl $0x3ff,0x44(%ebx) 0x00002f8c <isochronous_find_start+124>: jmp 0x2f65 <isochronous_find_start+85> 0x00002f8e <isochronous_find_start+126>: mov $0xffffffe5,%eax 0x00002f93 <isochronous_find_start+131>: jmp 0x2f67 <isochronous_find_start+87> 0x00002f95 <isochronous_find_start+133>: lea 0x0(%esi,1),%esi 0x00002f99 <isochronous_find_start+137>: lea 0x0(%edi,1),%edi ---Type <return> to continue, or q <return> to quit--- End of assembler dump. You'll agree that the patched versions assembler code is better by 64 bytes, no? So up to gcc 3.3.2 we can't be shure that gcc optimizes "% PowerOf2" to "& (PowerOf2 - 1)". Best regards, Karsten ------------------------------------------------------- This SF.net email is sponsored by: IT Product Guide on ITManagersJournal Use IT products in your business? Tell us what you think of them. Give us Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more http://productguide.itmanagersjournal.com/guidepromo.tmpl _______________________________________________ [EMAIL PROTECTED] To unsubscribe, use the last form field at: https://lists.sourceforge.net/lists/listinfo/linux-usb-devel