http://d.puremagic.com/issues/show_bug.cgi?id=2750

           Summary: Optimize slice copy with size known at compile time
           Product: D
           Version: 1.041
          Platform: PC
        OS/Version: Windows
            Status: NEW
          Keywords: patch, wrong-code
          Severity: normal
          Priority: P2
         Component: DMD
        AssignedTo: bugzi...@digitalmars.com
        ReportedBy: snake.sc...@gmail.com


It was discussed recently that the compiler intrinsic for slice copying was
slower than CRT memcpy():

http://tinyurl.com/cfxmva

In that particular case it was generating rep movsb despite the fact that the
slice size was known at compile time.

I'm proposing a patch which fixes this problem.  Here is an example.  This
code:

void main() {
    auto h = "hello\n";
    char buf[16];
    buf[0 .. h.length] = h;
}

compiled with -O -release -inline by the current 1.041:

__Dmain comdat
        assume  CS:__Dmain
                sub     ESP,020h
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    EBX
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                lea     ECX,01Ch[ESP]
                mov     EBX,0FFFFFFFFh
                mov     [ECX],EBX
                mov     EAX,6
                lea     ESI,0Ch[ESP]
                mov     4[ECX],EBX
                lea     EDI,01Ch[ESP]
                mov     8[ECX],EBX
                mov     0Ch[ECX],EBX
                mov     ECX,EAX
                rep
                movsb
                xor     EAX,EAX
                pop     EDI
                pop     ESI
                pop     EBX
                add     ESP,020h
                ret
__Dmain ends

and by a patched compiler:

__Dmain comdat
        assume  CS:__Dmain
                sub     ESP,020h
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    EBX
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                lea     ECX,01Ch[ESP]
                mov     EBX,0FFFFFFFFh
                mov     [ECX],EBX
                xor     EAX,EAX
                mov     4[ECX],EBX
                mov     8[ECX],EBX
                mov     0Ch[ECX],EBX
                pop     EDI
                pop     ESI
                pop     EBX
                add     ESP,020h
                ret
__Dmain ends

Here is the patch:

-------8<------------------------------
diff --git a/dmd/backend/cgelem.c b/dmd/backend/cgelem.c
index a2a4a1f..a80eefb 100644
--- a/dmd/backend/cgelem.c
+++ b/dmd/backend/cgelem.c
@@ -3773,6 +3773,16 @@ STATIC elem * el64_32(elem *e)
            e->E1 = el_selecte1(e->E1);
        }
        break;
+
+    case OPpair:
+       e = el_selecte1(el_selecte1(e));
+       goto L1;
+    case OPrpair:
+       e = el_selecte2(el_selecte1(e));
+       goto L1;
+    L1:
+       e->Ety = ty;
+       break;
   }
   return e;
 }
-------8<------------------------------


-- 

Reply via email to