Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=41f9412b206985a36145b423f58bf8b46085358e
Commit:     41f9412b206985a36145b423f58bf8b46085358e
Parent:     7674464cb31ff652d2eda69783ef61640eae4c3c
Author:     Jesper Nilsson <[EMAIL PROTECTED]>
AuthorDate: Fri Jan 25 17:54:14 2008 +0100
Committer:  Jesper Nilsson <[EMAIL PROTECTED]>
CommitDate: Fri Feb 8 11:06:35 2008 +0100

    CRIS v32: Update lib/checksum.S and lib/checksumcopy.S
    
    - Slight tweaks, use $acr + addoq to propagate carry across the loop 
boundary.
    - Better use of latency cycles.
    - Remove duplicate folding of carry, it is not needed.
---
 arch/cris/arch-v32/lib/checksum.S     |   72 ++++++++++----------------------
 arch/cris/arch-v32/lib/checksumcopy.S |   69 +++++++++----------------------
 2 files changed, 43 insertions(+), 98 deletions(-)

diff --git a/arch/cris/arch-v32/lib/checksum.S 
b/arch/cris/arch-v32/lib/checksum.S
index 32e6618..87f3fd7 100644
--- a/arch/cris/arch-v32/lib/checksum.S
+++ b/arch/cris/arch-v32/lib/checksum.S
@@ -1,6 +1,6 @@
 /*
  * A fast checksum routine using movem
- * Copyright (c) 1998-2001, 2003 Axis Communications AB
+ * Copyright (c) 1998-2007 Axis Communications AB
  *
  * csum_partial(const unsigned char * buff, int len, unsigned int sum)
  */
@@ -12,30 +12,23 @@ csum_partial:
        ;; r11 - length
        ;; r12 - checksum
 
-       ;; check for breakeven length between movem and normal word looping 
versions
-       ;; we also do _NOT_ want to compute a checksum over more than the
-       ;; actual length when length < 40
-
-       cmpu.w  80,$r11
-       blo     _word_loop
-       nop
-
-       ;; need to save the registers we use below in the movem loop
-       ;; this overhead is why we have a check above for breakeven length
-       ;; only r0 - r8 have to be saved, the other ones are clobber-able
-       ;; according to the ABI
+       ;; Optimized for large packets
+       subq    10*4, $r11
+       blt     _word_loop
+       move.d  $r11, $acr
 
        subq    9*4,$sp
-       subq    10*4,$r11       ; update length for the first loop
+       clearf  c
        movem   $r8,[$sp]
 
        ;; do a movem checksum
 
 _mloop:        movem   [$r10+],$r9     ; read 10 longwords
-
+       ;; Loop count without touching the c flag.
+       addoq   -10*4, $acr, $acr
        ;; perform dword checksumming on the 10 longwords
 
-       add.d   $r0,$r12
+       addc    $r0,$r12
        addc    $r1,$r12
        addc    $r2,$r12
        addc    $r3,$r12
@@ -46,60 +39,41 @@ _mloop:     movem   [$r10+],$r9     ; read 10 longwords
        addc    $r8,$r12
        addc    $r9,$r12
 
-       ;; fold the carry into the checksum, to avoid having to loop the carry
-       ;; back into the top
-
-       addc    0,$r12
-       addc    0,$r12          ; do it again, since we might have generated a 
carry
-
-       subq    10*4,$r11
-       bge     _mloop
-       nop
-
-       addq    10*4,$r11       ; compensate for last loop underflowing length
+       ;; test $acr without trashing carry.
+       move.d  $acr, $acr
+       bpl     _mloop
+       ;; r11 <= acr  is not really needed in the mloop, just using the dslot
+       ;; to prepare for what is needed after mloop.
+       move.d  $acr, $r11
 
+       ;; fold the last carry into r13
+       addc    0, $r12
        movem   [$sp+],$r8      ; restore regs
 
 _word_loop:
-       ;; only fold if there is anything to fold.
-
-       cmpq    0,$r12
-       beq     _no_fold
-
-       ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
-       ;; r9 and r13 can be used as temporaries.
+       addq    10*4,$r11       ; compensate for last loop underflowing length
 
        moveq   -1,$r9          ; put 0xffff in r9, faster than move.d 0xffff,r9
        lsrq    16,$r9
 
        move.d  $r12,$r13
        lsrq    16,$r13         ; r13 = checksum >> 16
-       and.d   $r9,$r12                ; checksum = checksum & 0xffff
-       add.d   $r13,$r12               ; checksum += r13
-       move.d  $r12,$r13               ; do the same again, maybe we got a 
carry last add
-       lsrq    16,$r13
-       and.d   $r9,$r12
-       add.d   $r13,$r12
+       and.d   $r9,$r12        ; checksum = checksum & 0xffff
 
 _no_fold:
-       cmpq    2,$r11
+       subq    2,$r11
        blt     _no_words
-       nop
+       add.d   $r13,$r12       ; checksum += r13
 
        ;; checksum the rest of the words
-
-       subq    2,$r11
-
 _wloop:        subq    2,$r11
        bge     _wloop
        addu.w  [$r10+],$r12
 
-       addq    2,$r11
-
 _no_words:
+       addq    2,$r11
        ;; see if we have one odd byte more
-       cmpq    1,$r11
-       beq     _do_byte
+       bne     _do_byte
        nop
        ret
        move.d  $r12,$r10
diff --git a/arch/cris/arch-v32/lib/checksumcopy.S 
b/arch/cris/arch-v32/lib/checksumcopy.S
index 9303ccb..21aabe9 100644
--- a/arch/cris/arch-v32/lib/checksumcopy.S
+++ b/arch/cris/arch-v32/lib/checksumcopy.S
@@ -1,6 +1,6 @@
 /*
  * A fast checksum+copy routine using movem
- * Copyright (c) 1998, 2001, 2003 Axis Communications AB
+ * Copyright (c) 1998-2007 Axis Communications AB
  *
  * Authors:    Bjorn Wesen
  *
@@ -16,32 +16,23 @@ csum_partial_copy_nocheck:
        ;; r12 - length
        ;; r13 - checksum
 
-       ;; check for breakeven length between movem and normal word looping 
versions
-       ;; we also do _NOT_ want to compute a checksum over more than the
-       ;; actual length when length < 40
-
-       cmpu.w  80,$r12
-       blo     _word_loop
-       nop
-
-       ;; need to save the registers we use below in the movem loop
-       ;; this overhead is why we have a check above for breakeven length
-       ;; only r0 - r8 have to be saved, the other ones are clobber-able
-       ;; according to the ABI
+       ;; Optimized for large packets
+       subq    10*4, $r12
+       blt     _word_loop
+       move.d  $r12, $acr
 
        subq    9*4,$sp
-       subq    10*4,$r12       ; update length for the first loop
+       clearf  c
        movem   $r8,[$sp]
 
        ;; do a movem copy and checksum
-
 1:     ;; A failing userspace access (the read) will have this as PC.
 _mloop:        movem   [$r10+],$r9     ; read 10 longwords
+       addoq   -10*4, $acr, $acr ; loop counter in latency cycle
        movem   $r9,[$r11+]     ; write 10 longwords
 
        ;; perform dword checksumming on the 10 longwords
-
-       add.d   $r0,$r13
+       addc    $r0,$r13
        addc    $r1,$r13
        addc    $r2,$r13
        addc    $r3,$r13
@@ -52,47 +43,30 @@ _mloop:     movem   [$r10+],$r9     ; read 10 longwords
        addc    $r8,$r13
        addc    $r9,$r13
 
-       ;; fold the carry into the checksum, to avoid having to loop the carry
-       ;; back into the top
-
-       addc    0,$r13
-       addc    0,$r13          ; do it again, since we might have generated a 
carry
-
-       subq    10*4,$r12
-       bge     _mloop
-       nop
-
-       addq    10*4,$r12       ; compensate for last loop underflowing length
+       ;; test $acr, without trashing carry.
+       move.d  $acr, $acr
+       bpl     _mloop
+       ;; r12 <= acr  is needed after mloop and in the exception handlers.
+       move.d  $acr, $r12
 
+       ;; fold the last carry into r13
+       addc    0, $r13
        movem   [$sp+],$r8      ; restore regs
 
 _word_loop:
-       ;; only fold if there is anything to fold.
-
-       cmpq    0,$r13
-       beq     _no_fold
+       addq    10*4,$r12       ; compensate for last loop underflowing length
 
        ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
        ;; r9 can be used as temporary.
-
        move.d  $r13,$r9
        lsrq    16,$r9          ; r0 = checksum >> 16
        and.d   0xffff,$r13     ; checksum = checksum & 0xffff
-       add.d   $r9,$r13        ; checksum += r0
-       move.d  $r13,$r9        ; do the same again, maybe we got a carry last 
add
-       lsrq    16,$r9
-       and.d   0xffff,$r13
-       add.d   $r9,$r13
 
-_no_fold:
-       cmpq    2,$r12
+       subq    2, $r12
        blt     _no_words
-       nop
+       add.d   $r9,$r13        ; checksum += r0
 
        ;; copy and checksum the rest of the words
-
-       subq    2,$r12
-
 2:     ;; A failing userspace access for the read below will have this as PC.
 _wloop:        move.w  [$r10+],$r9
        addu.w  $r9,$r13
@@ -100,12 +74,9 @@ _wloop:     move.w  [$r10+],$r9
        bge     _wloop
        move.w  $r9,[$r11+]
 
-       addq    2,$r12
-
 _no_words:
-       ;; see if we have one odd byte more
-       cmpq    1,$r12
-       beq     _do_byte
+       addq    2,$r12
+       bne     _do_byte
        nop
        ret
        move.d  $r13,$r10
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to