Author: jchandra
Date: Sat Sep  7 16:31:30 2013
New Revision: 255367
URL: http://svnweb.freebsd.org/changeset/base/255367

Log:
  Use a better version of memcpy/bcopy for mips kernel.
  
  Use a variant of mips libc memcpy for kernel. This implementation uses
  64-bit operations when compiled for 64-bit, and is significantly faster
  in that case.
  
  Submitted by: Tanmay Jagdale <[email protected]>

Added:
  head/sys/mips/mips/bcopy.S   (contents, props changed)
Modified:
  head/sys/conf/files.mips
  head/sys/mips/mips/support.S

Modified: head/sys/conf/files.mips
==============================================================================
--- head/sys/conf/files.mips    Sat Sep  7 16:16:57 2013        (r255366)
+++ head/sys/conf/files.mips    Sat Sep  7 16:31:30 2013        (r255367)
@@ -38,6 +38,7 @@ mips/mips/stack_machdep.c             optional        ddb 
 mips/mips/stdatomic.c                  standard \
        compile-with "${NORMAL_C:N-Wmissing-prototypes}"
 mips/mips/support.S                    standard
+mips/mips/bcopy.S                      standard
 mips/mips/swtch.S                      standard
 mips/mips/sys_machdep.c                        standard
 mips/mips/tlb.c                                standard

Added: head/sys/mips/mips/bcopy.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/mips/mips/bcopy.S  Sat Sep  7 16:31:30 2013        (r255367)
@@ -0,0 +1,286 @@
+/*     $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $   */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1993 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  [email protected]
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+/*
+ *     File:   mips_bcopy.s
+ *     Author: Chris Maeda
+ *     Date:   June 1993
+ *
+ *     Fast copy routine.  Derived from aligned_block_copy.
+ */
+
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#include <machine/endian.h>
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+       ASMSTR("from: @(#)mips_bcopy.s  2.2 CMU 18/06/93")
+#else
+       ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#ifdef __ABICALLS__
+       .abicalls
+#endif
+
+/*
+ *     bcopy(caddr_t src, caddr_t dst, unsigned int len)
+ *
+ *     a0      src address
+ *     a1      dst address
+ *     a2      length
+ */
+
+#define        SRCREG  a0
+#define        DSTREG  a1
+#define        SIZEREG a2
+
+LEAF(memcpy)
+       .set    noat
+       .set    noreorder
+
+       move    v0, a0
+       move    a0, a1
+       move    a1, v0
+
+ALEAF(bcopy)
+ALEAF(ovbcopy)
+       /*
+        *      Make sure we can copy forwards.
+        */
+       sltu    t0,SRCREG,DSTREG        # t0 == SRCREG < DSTREG
+       bne     t0,zero,6f              # copy backwards
+
+       /*
+        *      There are four alignment cases (with frequency)
+        *      (Based on measurements taken with a DECstation 5000/200
+        *       inside a Mach kernel.)
+        *
+        *      aligned   -> aligned            (mostly)
+        *      unaligned -> aligned            (sometimes)
+        *      aligned,unaligned -> unaligned  (almost never)
+        *
+        *      Note that we could add another case that checks if
+        *      the destination and source are unaligned but the
+        *      copy is alignable.  eg if src and dest are both
+        *      on a halfword boundary.
+        */
+       andi            t1,DSTREG,(SZREG-1)     # get last bits of dest
+       bne             t1,zero,3f              # dest unaligned
+       andi            t0,SRCREG,(SZREG-1)     # get last bits of src
+       bne             t0,zero,5f
+
+       /*
+        *      Forward aligned->aligned copy, 8 words at a time.
+        */
+98:
+       li              AT,-(SZREG*8)
+       and             t0,SIZEREG,AT           # count truncated to multiples
+       PTR_ADDU        a3,SRCREG,t0            # run fast loop up to this addr
+       sltu            AT,SRCREG,a3            # any work to do?
+       beq             AT,zero,2f
+       PTR_SUBU        SIZEREG,t0
+
+       /*
+        *      loop body
+        */
+1:     # cp
+       REG_L           t3,(0*SZREG)(SRCREG)
+       REG_L           v1,(1*SZREG)(SRCREG)
+       REG_L           t0,(2*SZREG)(SRCREG)
+       REG_L           t1,(3*SZREG)(SRCREG)
+       PTR_ADDU        SRCREG,SZREG*8
+       REG_S           t3,(0*SZREG)(DSTREG)
+       REG_S           v1,(1*SZREG)(DSTREG)
+       REG_S           t0,(2*SZREG)(DSTREG)
+       REG_S           t1,(3*SZREG)(DSTREG)
+       REG_L           t1,(-1*SZREG)(SRCREG)
+       REG_L           t0,(-2*SZREG)(SRCREG)
+       REG_L           v1,(-3*SZREG)(SRCREG)
+       REG_L           t3,(-4*SZREG)(SRCREG)
+       PTR_ADDU        DSTREG,SZREG*8
+       REG_S           t1,(-1*SZREG)(DSTREG)
+       REG_S           t0,(-2*SZREG)(DSTREG)
+       REG_S           v1,(-3*SZREG)(DSTREG)
+       bne             SRCREG,a3,1b
+       REG_S           t3,(-4*SZREG)(DSTREG)
+
+       /*
+        *      Copy a word at a time, no loop unrolling.
+        */
+2:     # wordcopy
+       andi            t2,SIZEREG,(SZREG-1)    # get byte count / SZREG
+       PTR_SUBU        t2,SIZEREG,t2           # t2 = words to copy * SZREG
+       beq             t2,zero,3f
+       PTR_ADDU        t0,SRCREG,t2            # stop at t0
+       PTR_SUBU        SIZEREG,SIZEREG,t2
+1:
+       REG_L           t3,0(SRCREG)
+       PTR_ADDU        SRCREG,SZREG
+       REG_S           t3,0(DSTREG)
+       bne             SRCREG,t0,1b
+       PTR_ADDU        DSTREG,SZREG
+
+3:     # bytecopy
+       beq             SIZEREG,zero,4f         # nothing left to do?
+       nop
+1:
+       lb              t3,0(SRCREG)
+       PTR_ADDU        SRCREG,1
+       sb              t3,0(DSTREG)
+       PTR_SUBU        SIZEREG,1
+       bgtz            SIZEREG,1b
+       PTR_ADDU        DSTREG,1
+
+4:     # copydone
+       j       ra
+       nop
+
+       /*
+        *      Copy from unaligned source to aligned dest.
+        */
+5:     # destaligned
+       andi            t0,SIZEREG,(SZREG-1)    # t0 = bytecount mod SZREG
+       PTR_SUBU        a3,SIZEREG,t0           # number of words to transfer
+       beq             a3,zero,3b
+       nop
+       move            SIZEREG,t0              # this many to do after we are 
done
+       PTR_ADDU        a3,SRCREG,a3            # stop point
+
+1:
+       REG_LHI         t3,0(SRCREG)
+       REG_LLO         t3,SZREG-1(SRCREG)
+       PTR_ADDI        SRCREG,SZREG
+       REG_S           t3,0(DSTREG)
+       bne             SRCREG,a3,1b
+       PTR_ADDI        DSTREG,SZREG
+
+       b               3b
+       nop
+
+6:     # backcopy -- based on above
+       PTR_ADDU        SRCREG,SIZEREG
+       PTR_ADDU        DSTREG,SIZEREG
+       andi            t1,DSTREG,SZREG-1       # get last 3 bits of dest
+       bne             t1,zero,3f
+       andi            t0,SRCREG,SZREG-1       # get last 3 bits of src
+       bne             t0,zero,5f
+
+       /*
+        *      Forward aligned->aligned copy, 8*4 bytes at a time.
+        */
+       li              AT,(-8*SZREG)
+       and             t0,SIZEREG,AT           # count truncated to multiple 
of 32
+       beq             t0,zero,2f              # any work to do?
+       PTR_SUBU        SIZEREG,t0
+       PTR_SUBU        a3,SRCREG,t0
+
+       /*
+        *      loop body
+        */
+1:     # cp
+       REG_L           t3,(-4*SZREG)(SRCREG)
+       REG_L           v1,(-3*SZREG)(SRCREG)
+       REG_L           t0,(-2*SZREG)(SRCREG)
+       REG_L           t1,(-1*SZREG)(SRCREG)
+       PTR_SUBU        SRCREG,8*SZREG
+       REG_S           t3,(-4*SZREG)(DSTREG)
+       REG_S           v1,(-3*SZREG)(DSTREG)
+       REG_S           t0,(-2*SZREG)(DSTREG)
+       REG_S           t1,(-1*SZREG)(DSTREG)
+       REG_L           t1,(3*SZREG)(SRCREG)
+       REG_L           t0,(2*SZREG)(SRCREG)
+       REG_L           v1,(1*SZREG)(SRCREG)
+       REG_L           t3,(0*SZREG)(SRCREG)
+       PTR_SUBU        DSTREG,8*SZREG
+       REG_S           t1,(3*SZREG)(DSTREG)
+       REG_S           t0,(2*SZREG)(DSTREG)
+       REG_S           v1,(1*SZREG)(DSTREG)
+       bne             SRCREG,a3,1b
+       REG_S           t3,(0*SZREG)(DSTREG)
+
+       /*
+        *      Copy a word at a time, no loop unrolling.
+        */
+2:     # wordcopy
+       andi            t2,SIZEREG,SZREG-1      # get byte count / 4
+       PTR_SUBU        t2,SIZEREG,t2           # t2 = number of words to copy
+       beq             t2,zero,3f
+       PTR_SUBU        t0,SRCREG,t2            # stop at t0
+       PTR_SUBU        SIZEREG,SIZEREG,t2
+1:
+       REG_L           t3,-SZREG(SRCREG)
+       PTR_SUBU        SRCREG,SZREG
+       REG_S           t3,-SZREG(DSTREG)
+       bne             SRCREG,t0,1b
+       PTR_SUBU        DSTREG,SZREG
+
+3:     # bytecopy
+       beq             SIZEREG,zero,4f         # nothing left to do?
+       nop
+1:
+       lb              t3,-1(SRCREG)
+       PTR_SUBU        SRCREG,1
+       sb              t3,-1(DSTREG)
+       PTR_SUBU        SIZEREG,1
+       bgtz            SIZEREG,1b
+       PTR_SUBU        DSTREG,1
+
+4:     # copydone
+       j       ra
+       nop
+
+       /*
+        *      Copy from unaligned source to aligned dest.
+        */
+5:     # destaligned
+       andi            t0,SIZEREG,SZREG-1      # t0 = bytecount mod 4
+       PTR_SUBU        a3,SIZEREG,t0           # number of words to transfer
+       beq             a3,zero,3b
+       nop
+       move            SIZEREG,t0              # this many to do after we are 
done
+       PTR_SUBU        a3,SRCREG,a3            # stop point
+
+1:
+       REG_LHI         t3,-SZREG(SRCREG)
+       REG_LLO         t3,-1(SRCREG)
+       PTR_SUBU        SRCREG,SZREG
+       REG_S           t3,-SZREG(DSTREG)
+       bne             SRCREG,a3,1b
+       PTR_SUBU        DSTREG,SZREG
+
+       b               3b
+       nop
+
+       .set    reorder
+       .set    at
+END(memcpy)

Modified: head/sys/mips/mips/support.S
==============================================================================
--- head/sys/mips/mips/support.S        Sat Sep  7 16:16:57 2013        
(r255366)
+++ head/sys/mips/mips/support.S        Sat Sep  7 16:31:30 2013        
(r255367)
@@ -507,98 +507,6 @@ LEAF(fswintrberr)
 END(fswintrberr)
 
 /*
- * memcpy(to, from, len)
- * {ov}bcopy(from, to, len)
- */
-LEAF(memcpy)
-       .set    noreorder
-       move    v0, a0                  # swap from and to
-       move    a0, a1
-       move    a1, v0
-ALEAF(bcopy)
-ALEAF(ovbcopy)
-       .set    noreorder
-       PTR_ADDU        t0, a0, a2              # t0 = end of s1 region
-       sltu    t1, a1, t0
-       sltu    t2, a0, a1
-       and     t1, t1, t2              # t1 = true if from < to < (from+len)
-       beq     t1, zero, forward       # non overlapping, do forward copy
-       slt     t2, a2, 12              # check for small copy
-
-       ble     a2, zero, 2f
-       PTR_ADDU        t1, a1, a2              # t1 = end of to region
-1:
-       lb      v1, -1(t0)              # copy bytes backwards,
-       PTR_SUBU        t0, t0, 1               #   doesnt happen often so do 
slow way
-       PTR_SUBU        t1, t1, 1
-       bne     t0, a0, 1b
-       sb      v1, 0(t1)
-2:
-       j       ra
-       nop
-forward:
-       bne     t2, zero, smallcpy      # do a small bcopy
-       xor     v1, a0, a1              # compare low two bits of addresses
-       and     v1, v1, 3
-       PTR_SUBU        a3, zero, a1            # compute # bytes to word align 
address
-       beq     v1, zero, aligned       # addresses can be word aligned
-       and     a3, a3, 3
-
-       beq     a3, zero, 1f
-       PTR_SUBU        a2, a2, a3              # subtract from remaining count
-       LWHI    v1, 0(a0)               # get next 4 bytes (unaligned)
-       LWLO    v1, 3(a0)
-       PTR_ADDU        a0, a0, a3
-       SWHI    v1, 0(a1)               # store 1, 2, or 3 bytes to align a1
-       PTR_ADDU        a1, a1, a3
-1:
-       and     v1, a2, 3               # compute number of words left
-       PTR_SUBU        a3, a2, v1
-       move    a2, v1
-       PTR_ADDU        a3, a3, a0              # compute ending address
-2:
-       LWHI    v1, 0(a0)               # copy words a0 unaligned, a1 aligned
-       LWLO    v1, 3(a0)
-       PTR_ADDU        a0, a0, 4
-       sw      v1, 0(a1)
-       PTR_ADDU        a1, a1, 4
-       bne     a0, a3, 2b
-       nop                             # We have to do this mmu-bug.
-       b       smallcpy
-       nop
-aligned:
-       beq     a3, zero, 1f
-       PTR_SUBU        a2, a2, a3              # subtract from remaining count
-       LWHI    v1, 0(a0)               # copy 1, 2, or 3 bytes to align
-       PTR_ADDU        a0, a0, a3
-       SWHI    v1, 0(a1)
-       PTR_ADDU        a1, a1, a3
-1:
-       and     v1, a2, 3               # compute number of whole words left
-       PTR_SUBU        a3, a2, v1
-       move    a2, v1
-       PTR_ADDU        a3, a3, a0              # compute ending address
-2:
-       lw      v1, 0(a0)               # copy words
-       PTR_ADDU        a0, a0, 4
-       sw      v1, 0(a1)
-       bne     a0, a3, 2b
-       PTR_ADDU        a1, a1, 4
-smallcpy:
-       ble     a2, zero, 2f
-       PTR_ADDU        a3, a2, a0              # compute ending address
-1:
-       lbu     v1, 0(a0)               # copy bytes
-       PTR_ADDU        a0, a0, 1
-       sb      v1, 0(a1)
-       bne     a0, a3, 1b
-       PTR_ADDU        a1, a1, 1          # MMU BUG ? can not do -1(a1) at 
0x80000000!!
-2:
-       j       ra
-       nop
-END(memcpy)
-
-/*
  * memset(void *s1, int c, int len)
  * NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp
  */
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to