Author: mjg
Date: Thu Nov 15 20:28:35 2018
New Revision: 340464
URL: https://svnweb.freebsd.org/changeset/base/340464

Log:
  amd64: sync up libc memset with the kernel version
  
  - tidy up memset to have rax set earlier for small sizes
  - finish the tail in memset with an overlapping store
  - align memset buffers to 16 bytes before using rep stos
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/lib/libc/amd64/string/memset.S

Modified: head/lib/libc/amd64/string/memset.S
==============================================================================
--- head/lib/libc/amd64/string/memset.S Thu Nov 15 20:20:39 2018        
(r340463)
+++ head/lib/libc/amd64/string/memset.S Thu Nov 15 20:28:35 2018        
(r340464)
@@ -31,12 +31,14 @@
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
+#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
 .macro MEMSET erms
-       movq    %rdi,%r9
+       movq    %rdi,%rax
        movq    %rdx,%rcx
        movzbq  %sil,%r8
-       movabs  $0x0101010101010101,%rax
-       imulq   %r8,%rax
+       movabs  $0x0101010101010101,%r10
+       imulq   %r8,%r10
 
        cmpq    $32,%rcx
        jb      1016f
@@ -45,10 +47,10 @@ __FBSDID("$FreeBSD$");
        ja      1256f
 
 1032:
-       movq    %rax,(%rdi)
-       movq    %rax,8(%rdi)
-       movq    %rax,16(%rdi)
-       movq    %rax,24(%rdi)
+       movq    %r10,(%rdi)
+       movq    %r10,8(%rdi)
+       movq    %r10,16(%rdi)
+       movq    %r10,24(%rdi)
        leaq    32(%rdi),%rdi
        subq    $32,%rcx
        cmpq    $32,%rcx
@@ -58,54 +60,72 @@ __FBSDID("$FreeBSD$");
 1016:
        cmpb    $16,%cl
        jl      1008f
-       movq    %rax,(%rdi)
-       movq    %rax,8(%rdi)
+       movq    %r10,(%rdi)
+       movq    %r10,8(%rdi)
        subb    $16,%cl
        jz      1000f
        leaq    16(%rdi),%rdi
 1008:
        cmpb    $8,%cl
        jl      1004f
-       movq    %rax,(%rdi)
+       movq    %r10,(%rdi)
        subb    $8,%cl
        jz      1000f
        leaq    8(%rdi),%rdi
 1004:
        cmpb    $4,%cl
        jl      1002f
-       movl    %eax,(%rdi)
+       movl    %r10d,(%rdi)
        subb    $4,%cl
        jz      1000f
        leaq    4(%rdi),%rdi
 1002:
        cmpb    $2,%cl
        jl      1001f
-       movw    %ax,(%rdi)
+       movw    %r10w,(%rdi)
        subb    $2,%cl
        jz      1000f
        leaq    2(%rdi),%rdi
 1001:
        cmpb    $1,%cl
        jl      1000f
-       movb    %al,(%rdi)
+       movb    %r10b,(%rdi)
 1000:
-       movq    %r9,%rax
        ret
-
+       ALIGN_TEXT
 1256:
+       movq    %rdi,%r9
+       movq    %r10,%rax
+       testl   $15,%edi
+       jnz     3f
+1:
 .if \erms == 1
        rep
        stosb
+       movq    %r9,%rax
 .else
+       movq    %rcx,%rdx
        shrq    $3,%rcx
        rep
        stosq
-       movq    %rdx,%rcx
-       andb    $7,%cl
-       jne     1004b
-.endif
        movq    %r9,%rax
+       andl    $7,%edx
+       jnz     2f
        ret
+2:
+       movq    %r10,-8(%rdi,%rdx)
+.endif
+       ret
+       ALIGN_TEXT
+3:
+       movq    %r10,(%rdi)
+       movq    %r10,8(%rdi)
+       movq    %rdi,%r8
+       andq    $15,%r8
+       leaq    -16(%rcx,%r8),%rcx
+       neg     %r8
+       leaq    16(%rdi,%r8),%rdi
+       jmp     1b
 .endm
 
 ENTRY(memset)
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to