https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=43743ed754727a0ab5bbe9b15068d3256791f011

commit 43743ed754727a0ab5bbe9b15068d3256791f011
Author:     Corinna Vinschen <[email protected]>
AuthorDate: Tue Dec 20 10:13:38 2022 +0100
Commit:     Corinna Vinschen <[email protected]>
CommitDate: Tue Dec 20 10:13:59 2022 +0100

    Cygwin: x86_64: import latest NetBSD bcopy.S
    
    Tweak slightly to allow implementing entire {w}mem{p}{cpy,move}
    family:
    
    Add WIDE macro processing for wmem* and POST macro processing for
    memp* functions.
    
    Signed-off-by: Corinna Vinschen <[email protected]>

Diff:
---
 winsup/cygwin/Makefile.am       |   8 +-
 winsup/cygwin/x86_64/bcopy.S    | 192 ++++++++++++++++++++++++++++++++++++++++
 winsup/cygwin/x86_64/memcpy.S   | 133 +---------------------------
 winsup/cygwin/x86_64/memmove.S  |   4 +
 winsup/cygwin/x86_64/mempcpy.S  |   5 ++
 winsup/cygwin/x86_64/wmemcpy.S  |   5 ++
 winsup/cygwin/x86_64/wmemmove.S |   5 ++
 winsup/cygwin/x86_64/wmempcpy.S |   6 ++
 8 files changed, 227 insertions(+), 131 deletions(-)

diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am
index f63e8959141b..f8c249f527e7 100644
--- a/winsup/cygwin/Makefile.am
+++ b/winsup/cygwin/Makefile.am
@@ -52,9 +52,15 @@ TEST_LIB_NAME=libcygwin0.a
 # These objects are included directly into the import library
 if TARGET_X86_64
 TARGET_FILES= \
+       x86_64/bcopy.S \
        x86_64/memchr.S \
        x86_64/memcpy.S \
-       x86_64/memset.S
+       x86_64/memmove.S \
+       x86_64/mempcpy.S \
+       x86_64/memset.S \
+       x86_64/wmemcpy.S \
+       x86_64/wmemmove.S \
+       x86_64/wmempcpy.S
 endif
 
 LIB_FILES= \
diff --git a/winsup/cygwin/x86_64/bcopy.S b/winsup/cygwin/x86_64/bcopy.S
new file mode 100644
index 000000000000..84dba1223e25
--- /dev/null
+++ b/winsup/cygwin/x86_64/bcopy.S
@@ -0,0 +1,192 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS)
+       RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $")
+#endif
+
+       /*
+        * (ov)bcopy (src,dst,cnt)
+        *  [email protected]     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+        *
+        * Hacked about by [email protected]
+        */
+
+#ifdef MEMCOPY
+#ifdef WIDE
+#ifdef POST
+ENTRY3(wmempcpy)
+#else
+ENTRY3(wmemcpy)
+#endif
+#else
+#ifdef POST
+ENTRY3(mempcpy)
+#else
+ENTRY3(memcpy)
+#endif
+#endif
+#define NO_OVERLAP
+#else
+#ifdef MEMMOVE
+#ifdef WIDE
+ENTRY3(wmemmove)
+#else
+ENTRY3(memmove)
+#endif
+#else
+ENTRY3(bcopy)
+#endif
+#endif
+#ifdef WIDE
+       shlq    $1,%rdx         /* cnt * sizeof (wchar_t) */
+#endif
+       movq    %rdx,%rcx
+#if defined(MEMCOPY) || defined(MEMMOVE)
+       movq    %rdi,%rax       /* must return destination address */
+#ifdef POST
+       addq    %rdx,%rax       /* + n */
+#endif
+       mov     %rdi,%r11       /* for misaligned check */
+#else
+       mov     %rsi,%r11       /* for misaligned check */
+       xchgq   %rdi,%rsi       /* bcopy() has arg order reversed */
+#endif
+
+#if !defined(NO_OVERLAP)
+       movq    %rdi,%r8
+       subq    %rsi,%r8
+#endif
+
+       shrq    $3,%rcx         /* count for copy by words */
+       jz      8f              /* j if less than 8 bytes */
+
+       lea     -8(%rdi,%rdx),%r9       /* target address of last 8 */
+       mov     -8(%rsi,%rdx),%r10      /* get last word */
+#if !defined(NO_OVERLAP)
+       cmpq    %rdx,%r8        /* overlapping? */
+       jb      10f
+#endif
+
+/*
+ * Non-overlaping, copy forwards.
+ * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
+ * if %ecx is more than 76.
+ * AMD might do something similar some day.
+ */
+       and     $7,%r11         /* destination misaligned ? */
+       jnz     2f
+       rep
+       movsq
+       mov     %r10,(%r9)      /* write last word */
+       ret
+
+/*
+ * Destination misaligned
+ * AMD say it is better to align the destination (not the source).
+ * This will also re-align copies if the source and dest are both
+ * misaligned by the same amount)
+ * (I think Nehalem will use its accelerated copy if the source
+ * and destination have the same alignment.)
+ */
+2:
+       lea     -9(%r11,%rdx),%rcx      /* post re-alignment count */
+       neg     %r11                    /* now -1 .. -7 */
+       mov     (%rsi),%rdx             /* get first word */
+       mov     %rdi,%r8                /* target for first word */
+       lea     8(%rsi,%r11),%rsi
+       lea     8(%rdi,%r11),%rdi
+       shr     $3,%rcx
+       rep
+       movsq
+       mov     %rdx,(%r8)              /* write first word */
+       mov     %r10,(%r9)              /* write last word */
+       ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards.
+ * Reverse copy is probably easy to code faster than 'rep movds'
+ * since that requires (IIRC) an extra clock every 3 iterations (AMD).
+ * However I don't suppose anything cares that much!
+ * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
+ * The copy is aligned with the buffer start (more likely to
+ * be a multiple of 8 than the end).
+ */
+10:
+       lea     -8(%rsi,%rcx,8),%rsi
+       lea     -8(%rdi,%rcx,8),%rdi
+       std
+       rep
+       movsq
+       cld
+       mov     %r10,(%r9)      /* write last bytes */
+       ret
+#endif
+
+/* Less than 8 bytes to copy, copy by bytes */
+/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
+ * For longer transfers it is 50+ !
+ */
+8:     mov     %rdx,%rcx
+
+#if !defined(NO_OVERLAP)
+       cmpq    %rdx,%r8        /* overlapping? */
+       jb      81f
+#endif
+
+       /* nope, copy forwards. */
+       rep
+       movsb
+       ret
+
+#if !defined(NO_OVERLAP)
+/* Must copy backwards */
+81:
+       lea     -1(%rsi,%rcx),%rsi
+       lea     -1(%rdi,%rcx),%rdi
+       std
+       rep
+       movsb
+       cld
+       ret
+#endif
+
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
diff --git a/winsup/cygwin/x86_64/memcpy.S b/winsup/cygwin/x86_64/memcpy.S
index 4be7a01459f9..a53243b5ff26 100644
--- a/winsup/cygwin/x86_64/memcpy.S
+++ b/winsup/cygwin/x86_64/memcpy.S
@@ -1,131 +1,4 @@
-/* These functions are almost verbatim FreeBSD code (even if the header of
-   one file mentiones NetBSD), just wrapped in the minimum required code to
-   make them work under the MS AMD64 ABI.
-   See FreeBSD src/lib/libc/amd64/string/bcopy.S */
+/*     $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $      */
 
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from locore.s.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of the University nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
-       .seh_proc _memcpy
-_memcpy:
-       movq    %rsi,8(%rsp)
-       movq    %rdi,16(%rsp)
-       .seh_endprologue
-       movq    %rcx,%rdi
-       movq    %rdx,%rsi
-       movq    %r8,%rdx
-
-       movq    %rdx,%rcx
-       movq    %rdi,%r8
-       subq    %rsi,%r8
-       cmpq    %rcx,%r8        /* overlapping? */
-       jb      1f
-       cld                     /* nope, copy forwards. */
-       shrq    $3,%rcx         /* copy by words */
-       rep movsq
-       movq    %rdx,%rcx
-       andq    $7,%rcx         /* any bytes left? */
-       rep movsb
-       jmp     2f
-1:
-       addq    %rcx,%rdi       /* copy backwards. */
-       addq    %rcx,%rsi
-       std
-       andq    $7,%rcx         /* any fractional bytes? */
-       decq    %rdi
-       decq    %rsi
-       rep movsb
-       movq    %rdx,%rcx       /* copy remainder by words */
-       shrq    $3,%rcx
-       subq    $7,%rsi
-       subq    $7,%rdi
-       rep movsq
-       cld
-2:
-       movq    8(%rsp),%rsi
-       movq    16(%rsp),%rdi
-       ret
-       .seh_endproc
-
-       .globl  memmove
-       .seh_proc memmove
-memmove:
-       .seh_endprologue
-       movq    %rcx,%rax       /* return dst */
-       jmp     _memcpy
-       .seh_endproc
-
-       .globl  memcpy
-       .seh_proc memcpy
-memcpy:
-       .seh_endprologue
-       movq    %rcx,%rax       /* return dst */
-       jmp     _memcpy
-       .seh_endproc
-
-       .globl  mempcpy
-       .seh_proc mempcpy
-mempcpy:
-       .seh_endprologue
-       movq    %rcx,%rax       /* return dst  */
-       addq    %r8,%rax        /*         + n */
-       jmp     _memcpy
-       .seh_endproc
-
-       .globl  wmemmove
-       .seh_proc wmemmove
-wmemmove:
-       .seh_endprologue
-       shlq    $1,%r8          /* cnt * sizeof (wchar_t) */
-       movq    %rcx,%rax       /* return dst */
-       jmp     _memcpy
-       .seh_endproc
-
-       .globl  wmemcpy
-       .seh_proc wmemcpy
-wmemcpy:
-       .seh_endprologue
-       shlq    $1,%r8          /* cnt * sizeof (wchar_t) */
-       movq    %rcx,%rax       /* return dst */
-       jmp     _memcpy
-       .seh_endproc
-
-       .globl  wmempcpy
-       .seh_proc wmempcpy
-wmempcpy:
-       .seh_endprologue
-       shlq    $1,%r8          /* cnt * sizeof (wchar_t) */
-       movq    %rcx,%rax       /* return dst */
-       addq    %r8,%rax        /*         + n */
-       jmp     _memcpy
-       .seh_endproc
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/memmove.S b/winsup/cygwin/x86_64/memmove.S
new file mode 100644
index 000000000000..f4b7b08257fa
--- /dev/null
+++ b/winsup/cygwin/x86_64/memmove.S
@@ -0,0 +1,4 @@
+/*     $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $     */
+
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/mempcpy.S b/winsup/cygwin/x86_64/mempcpy.S
new file mode 100644
index 000000000000..7ccb4f1d9c12
--- /dev/null
+++ b/winsup/cygwin/x86_64/mempcpy.S
@@ -0,0 +1,5 @@
+/*     $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $      */
+
+#define MEMCOPY
+#define POST
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemcpy.S b/winsup/cygwin/x86_64/wmemcpy.S
new file mode 100644
index 000000000000..c998ecac58b5
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemcpy.S
@@ -0,0 +1,5 @@
+/*     $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $      */
+
+#define MEMCOPY
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmemmove.S b/winsup/cygwin/x86_64/wmemmove.S
new file mode 100644
index 000000000000..e7ee8efeb693
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmemmove.S
@@ -0,0 +1,5 @@
+/*     $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $     */
+
+#define MEMMOVE
+#define WIDE
+#include "bcopy.S"
diff --git a/winsup/cygwin/x86_64/wmempcpy.S b/winsup/cygwin/x86_64/wmempcpy.S
new file mode 100644
index 000000000000..53f4ceb2e89e
--- /dev/null
+++ b/winsup/cygwin/x86_64/wmempcpy.S
@@ -0,0 +1,6 @@
+/*     $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $      */
+
+#define MEMCOPY
+#define WIDE
+#define POST
+#include "bcopy.S"

Reply via email to