I hereby challenge you to provide portable C code, that - compiled with gcc - is faster than this assembly code. Should be done in a few lines.
I bet my ass on it: You will fail! No matter what optimization you choose. You would also fail with msvc or Intel compiler. Regards, Timo Alex Ionescu wrote: > The version that GCC 4.4 and CL 15 will generate would be way more optimized > than this unportable/slower assembly code. > This isn't 1994 anymore. You can't beat the compiler anymore. > > Best regards, > Alex Ionescu > > > On Sun, Aug 2, 2009 at 3:31 PM, <[email protected]> wrote: > > >> Author: tkreuzer >> Date: Mon Aug 3 00:31:29 2009 >> New Revision: 42353 >> >> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev >> Log: >> asm version of DIB_32BPP_ColorFill: >> - Add frame pointer >> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned >> - Optimize the loop >> - Add comments >> >> Modified: >> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s >> >> Modified: >> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s >> URL: >> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff >> >> ============================================================================== >> --- trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s >> [iso-8859-1] (original) >> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s >> [iso-8859-1] Mon Aug 3 00:31:29 2009 >> @@ -4,78 +4,62 @@ >> * FILE: subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c >> * PURPOSE: ASM optimised 32bpp ColorFill >> * PROGRAMMERS: Magnus Olsen >> + * Timo Kreuzer ([email protected]) >> */ >> >> - .globl _DIB_32BPP_ColorFill >> - .intel_syntax noprefix >> +.intel_syntax noprefix >> >> - .def _DIB_32BPP_ColorFill; >> - .scl 2; >> - .type 32; >> - .endef >> - >> - _DIB_32BPP_ColorFill: >> - sub esp, 24 >> - mov ecx, [esp+32] >> - mov [esp+8], ebx >> - mov ebx, [esp+28] >> - mov [esp+20], ebp >> - mov ebp, [esp+36] >> - mov [esp+12], esi >> - mov [esp+16], edi >> - mov edi, [ecx] >> - mov esi, [ecx+8] >> - mov edx, [ebx+36] >> - sub esi, edi >> - mov edi, [ecx+4] >> - mov eax, edi >> - imul eax, edx >> - add eax, [ebx+32] >> - mov ebx, [ecx] >> - lea eax, [eax+ebx*4] >> - mov [esp+4], eax >> - mov eax, [ecx+12] >> - cmp eax, edi >> - jbe end >> - sub eax, edi >> - mov [esp], eax >> - lea esi, [esi+0] >> +/* >> + * BOOLEAN >> + * _cdecl >> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor); >> +*/ >> >> - for_loop: >> - mov eax, ebp >> - cld >> - mov ebx, esi >> - mov edi, [esp+4] >> - test edi, 3 >> - jnz algin_draw >> - mov ecx, esi >> - rep stosd >> - add [esp+4], edx >> - dec dword ptr [esp] >> - jnz for_loop >> - end: >> - mov ebx, [esp+8] >> - mov eax, 1 >> - mov esi, [esp+12] >> - mov edi, [esp+16] >> - mov ebp, [esp+20] >> - add esp, 24 >> - ret >> +.globl _DIB_32BPP_ColorFill >> +_DIB_32BPP_ColorFill: >> + push ebp >> + mov ebp, esp >> + push ebx >> + push esi >> + push edi >> + sub esp, 4 /* Space for lDelta */ >> >> - algin_draw: >> - stosd >> - dec ebx >> - mov ecx, ebx >> - rol eax, 16 >> - stosd >> - add [esp+4], edx >> - dec dword ptr [esp] >> - jnz for_loop >> + mov edx, [ebp+12] /* edx = prcl */ >> + mov ecx, [ebp+8] /* ecx = pso */ >> >> - mov ebx, [esp+8] >> - mov eax, 1 >> - mov esi, [esp+12] >> - mov edi, [esp+16] >> - mov ebp, [esp+20] >> - add esp, 24 >> - ret >> + mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */ >> + mov [esp], ebx /* lDelta = pso->lDelta; */ >> + mov edi, [edx+4] /* edi = prcl->top; */ >> + mov eax, edi /* eax = prcl->top; */ >> + imul eax, ebx /* eax = prcl->top * pso->lDelta; */ >> + add eax, [ecx+0x20] /* eax += pso->pvScan0; */ >> + mov ebx, [edx] /* ebx = prcl->left; */ >> + lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 * prcl->left; >> */ >> + >> + mov ebx, [edx+8] /* ebx = prcl->right; */ >> + sub ebx, [edx] /* ebx = prcl->right - prcl->left; */ >> + jbe end /* if (ebx <= 0) goto end; */ >> + >> + mov edx, [edx+12] /* edx = prcl->bottom; */ >> + sub edx, edi /* edx -= prcl->top; */ >> + jbe end /* if (eax <= 0) goto end; */ >> + >> + mov eax, [ebp+16] /* eax = iColor; */ >> + cld >> + >> +for_loop: /* do { */ >> + mov edi, esi /* edi = pvLine0; */ >> + mov ecx, ebx /* ecx = cx; */ >> + rep stosd /* memset(pvLine0, iColor, cx); */ >> + add esi, [esp] /* pvLine0 += lDelta; */ >> + dec edx /* cy--; */ >> + jnz for_loop /* } while (cy > 0); */ >> + >> +end: >> + mov eax, 1 >> + add esp, 4 >> + pop edi >> + pop esi >> + pop ebx >> + pop ebp >> + ret >> >> >> >> > > > ------------------------------------------------------------------------ > > _______________________________________________ > Ros-dev mailing list > [email protected] > http://www.reactos.org/mailman/listinfo/ros-dev
_______________________________________________ Ros-dev mailing list [email protected] http://www.reactos.org/mailman/listinfo/ros-dev
