Ben Dooks wrote:
> actually, thinking about it, we can probably get better code by doing:
> 
>       /* mop up any non-word aligned length reads. */
>       for (i = (len & ~3); i != len; i++)
>               ptr[i] = readb(info->regs + S3C2440_NFDATA);

It looks nicer, but, surprisingly, it's one instruction longer and
three instructions slower (in the normal case, i.e., with word
alignment), see below (with the "buf" fix).

Notation:
        ; <instruction count> +<instructions executed after readsl>

- Werner

----- for (i = 0; i != (len & 3); i++) ------------------------------------

s3c2440_nand_read_buf:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 1, uses_anonymous_args = 0
        mov     ip, sp                                  ; 1
        stmfd   sp!, {r4, r5, r6, fp, ip, lr, pc}       ; 2
        sub     fp, ip, #4                              ; 3
        sub     sp, sp, #4                              ; 4
        ldr     r6, [r0, #536]                          ; 5
        mov     r4, r2                                  ; 6
        ldr     r0, [r6, #96]                           ; 7
        cmp     r4, #0                                  ; 8
        add     r2, r2, #3                              ; 9
        movge   r2, r4                                  ; 10
        mov     r2, r2, asr #2                          ; 11
        bic     r3, r4, #3                              ; 12
        add     r0, r0, #16                             ; 13
        add     r5, r1, r3                              ; 14
        bl      __raw_readsl                            ; 15
        mov     r2, #0                                  ; 16    +1
        b       .L212                                   ; 17    +2
.L213:
        ldr     r3, [r6, #96]                           ; 18
        ldrb    r3, [r3, #16]   @ zero_extendqisi2      ; 19
        strb    r3, [r5, r2]                            ; 20
        add     r2, r2, #1                              ; 21
.L212:
        and     r3, r4, #3                              ; 22    +3
        cmp     r2, r3                                  ; 23    +4
        bne     .L213                                   ; 24    +5
        ldmfd   sp, {r3, r4, r5, r6, fp, sp, pc}        ; 25    +6

----- for (i = (len & ~3); i != len; i++) ---------------------------------

s3c2440_nand_read_buf:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 1, uses_anonymous_args = 0
        mov     ip, sp                                  ; 1
        stmfd   sp!, {r4, r5, r6, fp, ip, lr, pc}       ; 2
        sub     fp, ip, #4                              ; 3
        sub     sp, sp, #4                              ; 4
        ldr     r6, [r0, #536]                          ; 5
        mov     r5, r2                                  ; 6
        ldr     r0, [r6, #96]                           ; 7
        cmp     r5, #0                                  ; 8
        add     r2, r2, #3                              ; 9
        movge   r2, r5                                  ; 10
        mov     r2, r2, asr #2                          ; 11
        add     r0, r0, #16                             ; 12
        mov     r4, r1                                  ; 13
        bl      __raw_readsl                            ; 14
        bic     r1, r5, #3                              ; 15    +1
        add     r4, r4, r1                              ; 16    +2
        mov     r2, #0                                  ; 17    +3
        b       .L212                                   ; 18    +4
.L213:
        ldr     r3, [r6, #96]                           ; 19
        ldrb    r3, [r3, #16]   @ zero_extendqisi2      ; 20
        strb    r3, [r4], #1                            ; 21
.L212:
        rsb     r3, r1, r5                              ; 22    +5
        cmp     r2, r3                                  ; 23    +6
        add     r2, r2, #1                              ; 24    +7
        bne     .L213                                   ; 25    +8
        ldmfd   sp, {r3, r4, r5, r6, fp, sp, pc}        ; 26    +9


Reply via email to