On Wednesday, 28 November 2012 at 00:19:33 UTC, bearophile wrote:
dpaste is currently down and I don't have gdc2/dc2 installed here, so I can't show the asm from those other compilers.

fwiw, here is what LDC produces:

---
_D4test18__T10reverseArrTiZ10reverseArrFAiZv:
        lea     RAX, QWORD PTR [RSI + 4*RDI - 4]
        cmp     RSI, RAX
        jae     .LBB1_3
        lea     RAX, QWORD PTR [RSI + 4*RDI - 8]
        .align  16, 0x90
.LBB1_2:
        mov     ECX, DWORD PTR [RSI]
        mov     EDX, DWORD PTR [RAX + 4]
        mov     DWORD PTR [RSI], EDX
        mov     DWORD PTR [RAX + 4], ECX
        add     RSI, 4
        cmp     RSI, RAX
        lea     RAX, QWORD PTR [RAX - 4]
        jb      .LBB1_2
.LBB1_3:
        ret
---

---
_D3std9algorithm15__T7reverseTAiZ7reverseFAiZv:
        test    RDI, RDI
        je      .LBB2_4
        lea     RAX, QWORD PTR [RSI + 4*RDI - 4]
        .align  16, 0x90
.LBB2_2:
        mov     ECX, DWORD PTR [RSI]
        mov     EDX, DWORD PTR [RAX]
        mov     DWORD PTR [RSI], EDX
        mov     DWORD PTR [RAX], ECX
        cmp     RDI, 1
        je      .LBB2_4
        add     RAX, -4
        add     RSI, 4
        add     RDI, -2
        jne     .LBB2_2
.LBB2_4:
        ret
---

---
_D4test20__T12reverseArrayTiZ12reverseArrayFAiZv:
        lea     RAX, QWORD PTR [RSI + 4*RDI - 4]
        cmp     RSI, RAX
        jae     .LBB3_3
        add     RSI, 4
        .align  16, 0x90
.LBB3_2:
        mov     ECX, DWORD PTR [RSI - 4]
        mov     EDX, DWORD PTR [RAX]
        mov     DWORD PTR [RSI - 4], EDX
        mov     DWORD PTR [RAX], ECX
        add     RAX, -4
        cmp     RSI, RAX
        lea     RSI, QWORD PTR [RSI + 4]
        jb      .LBB3_2
.LBB3_3:
        ret
---

The extra jump in the std.algorithm version is still there – I haven't checked whether it would be feasible to optimize the induction variable away entirely.

David

Reply via email to