Author: mjg
Date: Fri Nov 30 20:58:08 2018
New Revision: 341351
URL: https://svnweb.freebsd.org/changeset/base/341351

Log:
  amd64: handle small memmove buffers with overlapping stores
  
  Handling sizes of > 32 backwards will be updated later.
  
  Reviewed by:  kib (kernel part)
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D18387

Modified:
  head/lib/libc/amd64/string/memmove.S
  head/sys/amd64/amd64/support.S

Modified: head/lib/libc/amd64/string/memmove.S
==============================================================================
--- head/lib/libc/amd64/string/memmove.S        Fri Nov 30 19:59:28 2018        
(r341350)
+++ head/lib/libc/amd64/string/memmove.S        Fri Nov 30 20:58:08 2018        
(r341351)
@@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$");
  * rsi - source
  * rdx - count
  *
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
  */
 .macro MEMMOVE erms overlap begin end
        \begin
+
+       /*
+        * For sizes 0..32 all data is read before it is written, so there
+        * is no correctness issue with direction of copying.
+        */
+       cmpq    $32,%rcx
+       jbe     101632f
+
 .if \overlap == 1
        movq    %rdi,%r8
        subq    %rsi,%r8
@@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$");
        jb      2f
 .endif
 
-       cmpq    $32,%rcx
-       jb      1016f
-
        cmpq    $256,%rcx
        ja      1256f
 
-1032:
+103200:
        movq    (%rsi),%rdx
        movq    %rdx,(%rdi)
        movq    8(%rsi),%rdx
@@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$");
        leaq    32(%rdi),%rdi
        subq    $32,%rcx
        cmpq    $32,%rcx
-       jae     1032b
+       jae     103200b
        cmpb    $0,%cl
-       jne     1016f
+       jne     101632f
        \end
        ret
        ALIGN_TEXT
-1016:
+101632:
        cmpb    $16,%cl
-       jl      1008f
+       jl      100816f
        movq    (%rsi),%rdx
+       movq    8(%rsi),%r8
+       movq    -16(%rsi,%rcx),%r9
+       movq    -8(%rsi,%rcx),%r10
        movq    %rdx,(%rdi)
-       movq    8(%rsi),%rdx
-       movq    %rdx,8(%rdi)
-       subb    $16,%cl
-       jz      1000f
-       leaq    16(%rsi),%rsi
-       leaq    16(%rdi),%rdi
-1008:
+       movq    %r8,8(%rdi)
+       movq    %r9,-16(%rdi,%rcx)
+       movq    %r10,-8(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100816:
        cmpb    $8,%cl
-       jl      1004f
+       jl      100408f
        movq    (%rsi),%rdx
+       movq    -8(%rsi,%rcx),%r8
        movq    %rdx,(%rdi)
-       subb    $8,%cl
-       jz      1000f
-       leaq    8(%rsi),%rsi
-       leaq    8(%rdi),%rdi
-1004:
+       movq    %r8,-8(%rdi,%rcx,)
+       \end
+       ret
+       ALIGN_TEXT
+100408:
        cmpb    $4,%cl
-       jl      1002f
+       jl      100204f
        movl    (%rsi),%edx
+       movl    -4(%rsi,%rcx),%r8d
        movl    %edx,(%rdi)
-       subb    $4,%cl
-       jz      1000f
-       leaq    4(%rsi),%rsi
-       leaq    4(%rdi),%rdi
-1002:
+       movl    %r8d,-4(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100204:
        cmpb    $2,%cl
-       jl      1001f
-       movw    (%rsi),%dx
+       jl      100001f
+       movzwl  (%rsi),%edx
+       movzwl  -2(%rsi,%rcx),%r8d
        movw    %dx,(%rdi)
-       subb    $2,%cl
-       jz      1000f
-       leaq    2(%rsi),%rsi
-       leaq    2(%rdi),%rdi
-1001:
+       movw    %r8w,-2(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100001:
        cmpb    $1,%cl
-       jl      1000f
+       jl      100000f
        movb    (%rsi),%dl
        movb    %dl,(%rdi)
-1000:
+100000:
        \end
        ret
 
@@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$");
        rep
        movsq
        movq    %rdx,%rcx
-       andb    $7,%cl                         /* any bytes left? */
-       jne     1004b
+       andl    $7,%ecx                         /* any bytes left? */
+       jne     100408b
 .endif
        \end
        ret
@@ -246,6 +257,7 @@ __FBSDID("$FreeBSD$");
        ret
 .endif
 .endm
+
 
 .macro MEMMOVE_BEGIN
        movq    %rdi,%rax

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Fri Nov 30 19:59:28 2018        
(r341350)
+++ head/sys/amd64/amd64/support.S      Fri Nov 30 20:58:08 2018        
(r341351)
@@ -205,11 +205,19 @@ END(memcmp)
  * rsi - source
  * rdx - count
  *
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
  */
 .macro MEMMOVE erms overlap begin end
        \begin
+
+       /*
+        * For sizes 0..32 all data is read before it is written, so there
+        * is no correctness issue with direction of copying.
+        */
+       cmpq    $32,%rcx
+       jbe     101632f
+
 .if \overlap == 1
        movq    %rdi,%r8
        subq    %rsi,%r8
@@ -217,13 +225,10 @@ END(memcmp)
        jb      2f
 .endif
 
-       cmpq    $32,%rcx
-       jb      1016f
-
        cmpq    $256,%rcx
        ja      1256f
 
-1032:
+103200:
        movq    (%rsi),%rdx
        movq    %rdx,(%rdi)
        movq    8(%rsi),%rdx
@@ -236,56 +241,62 @@ END(memcmp)
        leaq    32(%rdi),%rdi
        subq    $32,%rcx
        cmpq    $32,%rcx
-       jae     1032b
+       jae     103200b
        cmpb    $0,%cl
-       jne     1016f
+       jne     101632f
        \end
        ret
        ALIGN_TEXT
-1016:
+101632:
        cmpb    $16,%cl
-       jl      1008f
+       jl      100816f
        movq    (%rsi),%rdx
+       movq    8(%rsi),%r8
+       movq    -16(%rsi,%rcx),%r9
+       movq    -8(%rsi,%rcx),%r10
        movq    %rdx,(%rdi)
-       movq    8(%rsi),%rdx
-       movq    %rdx,8(%rdi)
-       subb    $16,%cl
-       jz      1000f
-       leaq    16(%rsi),%rsi
-       leaq    16(%rdi),%rdi
-1008:
+       movq    %r8,8(%rdi)
+       movq    %r9,-16(%rdi,%rcx)
+       movq    %r10,-8(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100816:
        cmpb    $8,%cl
-       jl      1004f
+       jl      100408f
        movq    (%rsi),%rdx
+       movq    -8(%rsi,%rcx),%r8
        movq    %rdx,(%rdi)
-       subb    $8,%cl
-       jz      1000f
-       leaq    8(%rsi),%rsi
-       leaq    8(%rdi),%rdi
-1004:
+       movq    %r8,-8(%rdi,%rcx,)
+       \end
+       ret
+       ALIGN_TEXT
+100408:
        cmpb    $4,%cl
-       jl      1002f
+       jl      100204f
        movl    (%rsi),%edx
+       movl    -4(%rsi,%rcx),%r8d
        movl    %edx,(%rdi)
-       subb    $4,%cl
-       jz      1000f
-       leaq    4(%rsi),%rsi
-       leaq    4(%rdi),%rdi
-1002:
+       movl    %r8d,-4(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100204:
        cmpb    $2,%cl
-       jl      1001f
-       movw    (%rsi),%dx
+       jl      100001f
+       movzwl  (%rsi),%edx
+       movzwl  -2(%rsi,%rcx),%r8d
        movw    %dx,(%rdi)
-       subb    $2,%cl
-       jz      1000f
-       leaq    2(%rsi),%rsi
-       leaq    2(%rdi),%rdi
-1001:
+       movw    %r8w,-2(%rdi,%rcx)
+       \end
+       ret
+       ALIGN_TEXT
+100001:
        cmpb    $1,%cl
-       jl      1000f
+       jl      100000f
        movb    (%rsi),%dl
        movb    %dl,(%rdi)
-1000:
+100000:
        \end
        ret
 
@@ -299,8 +310,8 @@ END(memcmp)
        rep
        movsq
        movq    %rdx,%rcx
-       andb    $7,%cl                         /* any bytes left? */
-       jne     1004b
+       andl    $7,%ecx                         /* any bytes left? */
+       jne     100408b
 .endif
        \end
        ret
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to