Author: mjg
Date: Sat Jul 25 00:24:11 2020
New Revision: 363505
URL: https://svnweb.freebsd.org/changeset/base/363505

Log:
  MFC r357208,r357309,r357239,r357310
  
      amd64: revamp memcmp
      amd64: speed up failing case for memcmp
      amd64: sync up libc memcmp with the kernel version (r357208)
      amd64: sync up libc memcmp with the kernel version (r357309)

Modified:
  stable/12/lib/libc/amd64/string/memcmp.S
  stable/12/sys/amd64/amd64/support.S
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/lib/libc/amd64/string/memcmp.S
==============================================================================
--- stable/12/lib/libc/amd64/string/memcmp.S    Sat Jul 25 00:03:23 2020        
(r363504)
+++ stable/12/lib/libc/amd64/string/memcmp.S    Sat Jul 25 00:24:11 2020        
(r363505)
@@ -31,91 +31,176 @@
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
+#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
 ENTRY(memcmp)
-       cmpq    $16,%rdx
-       jae     5f
-1:
-       testq   %rdx,%rdx
-       je      3f
-       xorl    %ecx,%ecx
-2:
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jne     2b
-3:
        xorl    %eax,%eax
+10:
+       cmpq    $16,%rdx
+       ja      101632f
+
+100816:
+       cmpb    $8,%dl
+       jl      100408f
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     80f
+       movq    -8(%rdi,%rdx),%r8
+       movq    -8(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10081608f
        ret
-4:
+100408:
+       cmpb    $4,%dl
+       jl      100204f
+       movl    (%rdi),%r8d
+       movl    (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     80f
+       movl    -4(%rdi,%rdx),%r8d
+       movl    -4(%rsi,%rdx),%r9d
+       cmpl    %r8d,%r9d
+       jne     10040804f
+       ret
+100204:
+       cmpb    $2,%dl
+       jl      100001f
+       movzwl  (%rdi),%r8d
+       movzwl  (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       movzwl  -2(%rdi,%rdx),%r8d
+       movzwl  -2(%rsi,%rdx),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       ret
+100001:
+       cmpb    $1,%dl
+       jl      100000f
+       movzbl  (%rdi),%eax
+       movzbl  (%rsi),%r8d
        subl    %r8d,%eax
+100000:
        ret
-5:
+ALIGN_TEXT
+101632:
        cmpq    $32,%rdx
-       jae     7f
-6:
-       /*
-        * 8 bytes
-        */
+       ja      103200f
        movq    (%rdi),%r8
        movq    (%rsi),%r9
        cmpq    %r8,%r9
-       jne     1b
-       leaq    8(%rdi),%rdi
-       leaq    8(%rsi),%rsi
-       subq    $8,%rdx
-       cmpq    $8,%rdx
-       jae     6b
-       jl      1b
-       jmp     3b
-7:
-       /*
-        * 32 bytes
-        */
-       movq    (%rsi),%r8
+       jne     80f
+       movq    8(%rdi),%r8
        movq    8(%rsi),%r9
-       subq    (%rdi),%r8
-       subq    8(%rdi),%r9
-       or      %r8,%r9
-       jnz     1b
+       cmpq    %r8,%r9
+       jne     10163208f
+       movq    -16(%rdi,%rdx),%r8
+       movq    -16(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10163216f
+       movq    -8(%rdi,%rdx),%r8
+       movq    -8(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10163224f
+       ret
+ALIGN_TEXT
+103200:
+       movq    (%rdi),%r8
+       movq    8(%rdi),%r9
+       subq    (%rsi),%r8
+       subq    8(%rsi),%r9
+       orq     %r8,%r9
+       jnz     10320000f
 
-       movq    16(%rsi),%r8
-       movq    24(%rsi),%r9
-       subq    16(%rdi),%r8
-       subq    24(%rdi),%r9
-       or      %r8,%r9
-       jnz     1b
+       movq    16(%rdi),%r8
+       movq    24(%rdi),%r9
+       subq    16(%rsi),%r8
+       subq    24(%rsi),%r9
+       orq     %r8,%r9
+       jnz     10320016f
 
        leaq    32(%rdi),%rdi
        leaq    32(%rsi),%rsi
        subq    $32,%rdx
        cmpq    $32,%rdx
-       jae     7b
-       jnz     1b
-       jmp     3b
+       jae     103200b
+       cmpb    $0,%dl
+       jne     10b
+       ret
+
+/*
+ * Mismatch was found.
+ *
+ * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
+ */
+ALIGN_TEXT
+10320016:
+       leaq    16(%rdi),%rdi
+       leaq    16(%rsi),%rsi
+10320000:
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     80f
+       leaq    8(%rdi),%rdi
+       leaq    8(%rsi),%rsi
+       jmp     80f
+ALIGN_TEXT
+10081608:
+10163224:
+       leaq    -8(%rdi,%rdx),%rdi
+       leaq    -8(%rsi,%rdx),%rsi
+       jmp     80f
+ALIGN_TEXT
+10163216:
+       leaq    -16(%rdi,%rdx),%rdi
+       leaq    -16(%rsi,%rdx),%rsi
+       jmp     80f
+ALIGN_TEXT
+10163208:
+       leaq    8(%rdi),%rdi
+       leaq    8(%rsi),%rsi
+       jmp     80f
+ALIGN_TEXT
+10040804:
+       leaq    -4(%rdi,%rdx),%rdi
+       leaq    -4(%rsi,%rdx),%rsi
+       jmp     1f
+
+ALIGN_TEXT
+80:
+       movl    (%rdi),%r8d
+       movl    (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       leaq    4(%rdi),%rdi
+       leaq    4(%rsi),%rsi
+
+/*
+ * We have up to 4 bytes to inspect.
+ */
+1:
+       movzbl  (%rdi),%eax
+       movzbl  (%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  1(%rdi),%eax
+       movzbl  1(%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  2(%rdi),%eax
+       movzbl  2(%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  3(%rdi),%eax
+       movzbl  3(%rsi),%r8d
+2:
+       subl    %r8d,%eax
+       ret
 END(memcmp)
 
        .section .note.GNU-stack,"",%progbits

Modified: stable/12/sys/amd64/amd64/support.S
==============================================================================
--- stable/12/sys/amd64/amd64/support.S Sat Jul 25 00:03:23 2020        
(r363504)
+++ stable/12/sys/amd64/amd64/support.S Sat Jul 25 00:24:11 2020        
(r363505)
@@ -107,96 +107,185 @@ END(sse2_pagezero)
 
 /*
  * memcmpy(b1, b2, len)
- *        rdi,rsi,len
+ *        rdi,rsi,rdx
  */
 ENTRY(memcmp)
        PUSH_FRAME_POINTER
-       cmpq    $16,%rdx
-       jae     5f
-1:
-       testq   %rdx,%rdx
-       je      3f
-       xorl    %ecx,%ecx
-2:
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jz      3f
-       movzbl  (%rdi,%rcx,1),%eax
-       movzbl  (%rsi,%rcx,1),%r8d
-       cmpb    %r8b,%al
-       jne     4f
-       addq    $1,%rcx
-       cmpq    %rcx,%rdx
-       jne     2b
-3:
+
        xorl    %eax,%eax
+10:
+       cmpq    $16,%rdx
+       ja      101632f
+
+100816:
+       cmpb    $8,%dl
+       jl      100408f
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     80f
+       movq    -8(%rdi,%rdx),%r8
+       movq    -8(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10081608f
        POP_FRAME_POINTER
        ret
-4:
+100408:
+       cmpb    $4,%dl
+       jl      100204f
+       movl    (%rdi),%r8d
+       movl    (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     80f
+       movl    -4(%rdi,%rdx),%r8d
+       movl    -4(%rsi,%rdx),%r9d
+       cmpl    %r8d,%r9d
+       jne     10040804f
+       POP_FRAME_POINTER
+       ret
+100204:
+       cmpb    $2,%dl
+       jl      100001f
+       movzwl  (%rdi),%r8d
+       movzwl  (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       movzwl  -2(%rdi,%rdx),%r8d
+       movzwl  -2(%rsi,%rdx),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       POP_FRAME_POINTER
+       ret
+100001:
+       cmpb    $1,%dl
+       jl      100000f
+       movzbl  (%rdi),%eax
+       movzbl  (%rsi),%r8d
        subl    %r8d,%eax
+100000:
        POP_FRAME_POINTER
        ret
-5:
+ALIGN_TEXT
+101632:
        cmpq    $32,%rdx
-       jae     7f
-6:
-       /*
-        * 8 bytes
-        */
-       movq    (%rdi),%r8
-       movq    (%rsi),%r9
-       cmpq    %r8,%r9
-       jne     1b
+       ja      103200f
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     80f
+       movq    8(%rdi),%r8
+       movq    8(%rsi),%r9
+       cmpq    %r8,%r9
+       jne     10163208f
+       movq    -16(%rdi,%rdx),%r8
+       movq    -16(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10163216f
+       movq    -8(%rdi,%rdx),%r8
+       movq    -8(%rsi,%rdx),%r9
+       cmpq    %r8,%r9
+       jne     10163224f
+       POP_FRAME_POINTER
+       ret
+ALIGN_TEXT
+103200:
+       movq    (%rdi),%r8
+       movq    8(%rdi),%r9
+       subq    (%rsi),%r8
+       subq    8(%rsi),%r9
+       orq     %r8,%r9
+       jnz     10320000f
+
+       movq    16(%rdi),%r8
+       movq    24(%rdi),%r9
+       subq    16(%rsi),%r8
+       subq    24(%rsi),%r9
+       orq     %r8,%r9
+       jnz     10320016f
+
+       leaq    32(%rdi),%rdi
+       leaq    32(%rsi),%rsi
+       subq    $32,%rdx
+       cmpq    $32,%rdx
+       jae     103200b
+       cmpb    $0,%dl
+       jne     10b
+       POP_FRAME_POINTER
+       ret
+
+/*
+ * Mismatch was found.
+ *
+ * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
+ */
+ALIGN_TEXT
+10320016:
+       leaq    16(%rdi),%rdi
+       leaq    16(%rsi),%rsi
+10320000:
+       movq    (%rdi),%r8
+       movq    (%rsi),%r9
+       cmpq    %r8,%r9
+       jne     80f
        leaq    8(%rdi),%rdi
        leaq    8(%rsi),%rsi
-       subq    $8,%rdx
-       cmpq    $8,%rdx
-       jae     6b
-       jl      1b
-       jmp     3b
-7:
-       /*
-        * 32 bytes
-        */
-       movq    (%rsi),%r8
-       movq    8(%rsi),%r9
-       subq    (%rdi),%r8
-       subq    8(%rdi),%r9
-       or      %r8,%r9
-       jnz     1b
+       jmp     80f
+ALIGN_TEXT
+10081608:
+10163224:
+       leaq    -8(%rdi,%rdx),%rdi
+       leaq    -8(%rsi,%rdx),%rsi
+       jmp     80f
+ALIGN_TEXT
+10163216:
+       leaq    -16(%rdi,%rdx),%rdi
+       leaq    -16(%rsi,%rdx),%rsi
+       jmp     80f
+ALIGN_TEXT
+10163208:
+       leaq    8(%rdi),%rdi
+       leaq    8(%rsi),%rsi
+       jmp     80f
+ALIGN_TEXT
+10040804:
+       leaq    -4(%rdi,%rdx),%rdi
+       leaq    -4(%rsi,%rdx),%rsi
+       jmp     1f
 
-       movq    16(%rsi),%r8
-       movq    24(%rsi),%r9
-       subq    16(%rdi),%r8
-       subq    24(%rdi),%r9
-       or      %r8,%r9
-       jnz     1b
+ALIGN_TEXT
+80:
+       movl    (%rdi),%r8d
+       movl    (%rsi),%r9d
+       cmpl    %r8d,%r9d
+       jne     1f
+       leaq    4(%rdi),%rdi
+       leaq    4(%rsi),%rsi
 
-       leaq    32(%rdi),%rdi
-       leaq    32(%rsi),%rsi
-       subq    $32,%rdx
-       cmpq    $32,%rdx
-       jae     7b
-       jnz     1b
-       jmp     3b
+/*
+ * We have up to 4 bytes to inspect.
+ */
+1:
+       movzbl  (%rdi),%eax
+       movzbl  (%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  1(%rdi),%eax
+       movzbl  1(%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  2(%rdi),%eax
+       movzbl  2(%rsi),%r8d
+       cmpb    %r8b,%al
+       jne     2f
+
+       movzbl  3(%rdi),%eax
+       movzbl  3(%rsi),%r8d
+2:
+       subl    %r8d,%eax
+       POP_FRAME_POINTER
+       ret
 END(memcmp)
 
 /*
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to