I think this is about it for intrinsics work for v3. This patch is
(mostly) for the files in intrinsc\*.c that weren't changed by any
previous work. It's possible that not everything in this patch will get
approved, but I figure it's easier to ask forgiveness than permission.
__movsb, __movsd, __movsq, __movsw: Moved to intrin-impl.
__rdtsc: Change to use builtin, moved to intrin-impl, resolved conflict
with ia32intrin.
_umul128 & _mul128: Moved to intrin-impl.
__shiftright128 & __shiftleft128: Re-written as asm, moved to intrin-impl.h.
_lrotr, _lrotl: Fix bug caused by ia32intrin.h when longs are 4 bytes long.
RtlSecureZeroMemory - According to msdn, this is not an intrinsic and
should only be defined in winnt.h. *File deleted from intrincs.*
UnsignedMultiplyExtract128 & MultiplyExtract128: According to msdn,
these are not intrinsics. Also, MultiplyExtract128 doesn't work right.
*Files deleted from intrincs* and code fixed in winnt.h.
_InterlockedAdd & _InterlockedAdd64: According to msdn, these intrinsics
are only available for itanium. I'm not sure the inline asm we have
will run properly there, and there are no #if's around it to limit it to
that platform. Note that winnt.h has inlines for x86/x64 for these.
*Files deleted from intrincs*.
dw
Index: mingw-w64-crt/intrincs/__movsb.c
===================================================================
--- mingw-w64-crt/intrincs/__movsb.c (revision 6023)
+++ mingw-w64-crt/intrincs/__movsb.c (working copy)
@@ -1,12 +1,10 @@
-#include <intrin.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-void __movsb(unsigned char *Destination, unsigned char const *Source, size_t Count)
-{
- __asm__ __volatile__
- (
- "rep; movsb" :
- [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
- "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
- );
-}
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___movsb /* Causes code generation in intrin-impl.h */
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/__movsd.c
===================================================================
--- mingw-w64-crt/intrincs/__movsd.c (revision 6023)
+++ mingw-w64-crt/intrincs/__movsd.c (working copy)
@@ -1,12 +1,10 @@
-#include <intrin.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-void __movsd(unsigned __LONG32 *Dest, unsigned __LONG32 const *Source, size_t Count)
-{
- __asm__ __volatile__
- (
- "rep; movsd" :
- [Dest] "=D" (Dest), [Source] "=S" (Source), [Count] "=c" (Count) :
- "[Dest]" (Dest), "[Source]" (Source), "[Count]" (Count)
- );
-}
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___movsd /* Causes code generation in intrin-impl.h */
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/__movsq.c
===================================================================
--- mingw-w64-crt/intrincs/__movsq.c (revision 6023)
+++ mingw-w64-crt/intrincs/__movsq.c (working copy)
@@ -1,12 +1,10 @@
-#include <intrin.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-void __movsq(unsigned long long *Dest, unsigned long long const *Source, size_t Count)
-{
- __asm__ __volatile__
- (
- "rep; movsq" :
- [Dest] "=D" (Dest), [Source] "=S" (Source), [Count] "=c" (Count) :
- "[Dest]" (Dest), "[Source]" (Source), "[Count]" (Count)
- );
-}
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___movsq /* Causes code generation in intrin-impl.h */
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/__movsw.c
===================================================================
--- mingw-w64-crt/intrincs/__movsw.c (revision 6023)
+++ mingw-w64-crt/intrincs/__movsw.c (working copy)
@@ -1,12 +1,10 @@
-#include <intrin.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-void __movsw(unsigned short *Dest, unsigned short const *Source, size_t Count)
-{
- __asm__ __volatile__
- (
- "rep; movsw" :
- [Dest] "=D" (Dest), [Source] "=S" (Source), [Count] "=c" (Count) :
- "[Dest]" (Dest), "[Source]" (Source), "[Count]" (Count)
- );
-}
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___movsw /* Causes code generation in intrin-impl.h */
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/__shiftleft128.c
===================================================================
--- mingw-w64-crt/intrincs/__shiftleft128.c (revision 6023)
+++ mingw-w64-crt/intrincs/__shiftleft128.c (working copy)
@@ -1,27 +1,10 @@
-#include <_mingw.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-#ifdef _WIN64
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___shiftleft128 /* Causes code generation in intrin-impl.h */
-unsigned __int64 __shiftleft128 (unsigned __int64 LowPart,
- unsigned __int64 HighPart, unsigned char Shift);
-
-unsigned __int64 __shiftleft128 (unsigned __int64 LowPart,
- unsigned __int64 HighPart, unsigned char Shift)
-{
- if (Shift >= 128)
- return 0ULL;
- if (!Shift)
- return HighPart;
- if (Shift >= 64)
- {
- HighPart = LowPart;
- Shift -= 64;
- LowPart = 0;
- }
- HighPart <<= Shift;
- LowPart >>= (64 - Shift);
- return (HighPart | LowPart);
-}
-
-#endif
-
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/__shiftright128.c
===================================================================
--- mingw-w64-crt/intrincs/__shiftright128.c (revision 6023)
+++ mingw-w64-crt/intrincs/__shiftright128.c (working copy)
@@ -1,27 +1,10 @@
-#include <_mingw.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-#ifdef _WIN64
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL___shiftright128 /* Causes code generation in intrin-impl.h */
-unsigned __int64 __shiftright128 (unsigned __int64 LowPart,
- unsigned __int64 HighPart, unsigned char Shift);
-
-unsigned __int64 __shiftright128 (unsigned __int64 LowPart,
- unsigned __int64 HighPart, unsigned char Shift)
-{
- if (Shift >= 128)
- return 0ULL;
- if (!Shift)
- return LowPart;
- if (Shift >= 64)
- {
- LowPart = HighPart;
- Shift -= 64;
- HighPart = 0;
- }
- LowPart >>= Shift;
- HighPart <<= (64 - Shift);
- return (HighPart | LowPart);
-}
-
-#endif
-
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/_mul128.c
===================================================================
--- mingw-w64-crt/intrincs/_mul128.c (revision 6023)
+++ mingw-w64-crt/intrincs/_mul128.c (working copy)
@@ -1,22 +1,10 @@
-#include <_mingw.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-#ifdef _WIN64
-#ifndef __SIZEOF_INT128__
-typedef signed int __xint128 __attribute__ ((__mode__ (TI)));
-#else
-typedef signed __int128 __xint128;
-#endif
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL__mul128 /* Causes code generation in intrin-impl.h */
-__int64 _mul128(__int64, __int64, __int64 *);
-
-__int64 _mul128(__int64 a, __int64 b, __int64 *hi)
-{
- union { __xint128 v; __int64 sv[2]; } var;
- var.v = ((__xint128) a) * ((__xint128) b);
- if (hi) *hi = var.sv[1];
- return var.sv[0];
-}
-
-#endif /* _WIN64 */
-
-
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/_umul128.c
===================================================================
--- mingw-w64-crt/intrincs/_umul128.c (revision 6023)
+++ mingw-w64-crt/intrincs/_umul128.c (working copy)
@@ -1,22 +1,10 @@
-#include <_mingw.h>
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
-#ifdef _WIN64
-#ifndef __SIZEOF_INT128__
-typedef unsigned int __uint128 __attribute__ ((__mode__ (TI)));
-#else
-typedef unsigned __int128 __uint128;
-#endif
+#define __INTRINSIC_ONLYSPECIAL
+#define __INTRINSIC_SPECIAL__umul128 /* Causes code generation in intrin-impl.h */
-unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64 *);
-
-unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, unsigned __int64 *hi)
-{
- union { __uint128 v; unsigned __int64 sv[2]; } var;
- var.v = ((__uint128) a) * ((__uint128) b);
- if (hi) *hi = var.sv[1];
- return var.sv[0];
-}
-
-#endif /* _WIN64 */
-
-
+#include <intrin.h>
Index: mingw-w64-crt/intrincs/ilockadd.c
===================================================================
--- mingw-w64-crt/intrincs/ilockadd.c (revision 6023)
+++ mingw-w64-crt/intrincs/ilockadd.c (working copy)
@@ -1,25 +0,0 @@
-#include <intrin.h>
-
-__LONG32 _InterlockedAdd(__LONG32 volatile *, __LONG32); /* not in intrin.h */
-__LONG32 _InterlockedAdd(__LONG32 volatile *Addend, __LONG32 Value)
-{
-/* return InterlockedExchangeAdd(Addend,Value) + Value; */
- __LONG32 ret;
- __asm__ __volatile__ ("lock\n\t"
- "xaddl %0,(%1)"
- : "=r" (ret)
- : "r" (Addend), "0" (Value)
- : "memory");
- return ret + Value;
-}
-
-#ifdef _WIN64
-__LONG32 InterlockedAdd(__LONG32 volatile *, __LONG32) __attribute__((alias("_InterlockedAdd")));
-#else
-__LONG32 __stdcall InterlockedAdd(__LONG32 volatile *Addend, __LONG32 Value);
-__LONG32 __stdcall InterlockedAdd(__LONG32 volatile *Addend, __LONG32 Value)
-{
- return _InterlockedAdd(Addend, Value);
-}
-#endif
-
Index: mingw-w64-crt/intrincs/ilockadd64.c
===================================================================
--- mingw-w64-crt/intrincs/ilockadd64.c (revision 6023)
+++ mingw-w64-crt/intrincs/ilockadd64.c (working copy)
@@ -1,17 +0,0 @@
-#include <intrin.h>
-
-__int64 _InterlockedAdd64(__int64 volatile *, __int64); /* not in intrin.h */
-__int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value)
-{
-/* return InterlockedExchangeAdd64(Addend,Value) + Value; */
- __int64 ret;
- __asm__ __volatile__ ("lock\n\t"
- "xaddq %0,(%1)"
- : "=r" (ret)
- : "r" (Addend), "0" (Value)
- : "memory");
- return ret + Value;
-}
-
-__int64 InterlockedAdd64(__int64 volatile *, __int64) __attribute__((alias("_InterlockedAdd64")));
-
Index: mingw-w64-crt/intrincs/mul128ex.c
===================================================================
--- mingw-w64-crt/intrincs/mul128ex.c (revision 6023)
+++ mingw-w64-crt/intrincs/mul128ex.c (working copy)
@@ -1,17 +0,0 @@
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#define __CRT__NO_INLINE
-#include <windows.h>
-
-/* for __x86_64 only */
-
-LONG64 MultiplyExtract128(LONG64 Multiplier,LONG64 Multiplicand,BYTE Shift) {
- LONG64 extractedProduct;
- LONG64 highProduct;
- LONG64 lowProduct;
- lowProduct = Multiply128(Multiplier,Multiplicand,&highProduct);
- extractedProduct = (LONG64)ShiftRight128((LONG64)lowProduct,(LONG64)highProduct,Shift);
- return extractedProduct;
-}
-
Index: mingw-w64-crt/intrincs/rdtsc.c
===================================================================
--- mingw-w64-crt/intrincs/rdtsc.c (revision 6023)
+++ mingw-w64-crt/intrincs/rdtsc.c (working copy)
@@ -4,18 +4,13 @@
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
*/
+/* This function duplicated here since it would conflict
+ with the definition in ia32intrin.h */
+
#include <intrin.h>
unsigned __int64 __rdtsc(void)
{
-#ifdef _WIN64
- unsigned __int64 val1, val2;
-#else
- unsigned int val1, val2;
-#endif
- __asm__ __volatile__ (
- "rdtsc"
- : "=a" (val1), "=d" (val2));
- return ((unsigned __int64)val1) | (((unsigned __int64)val2) << 32);
+ return __builtin_ia32_rdtsc();
}
Index: mingw-w64-crt/intrincs/RtlSecureZeroMemory.c
===================================================================
--- mingw-w64-crt/intrincs/RtlSecureZeroMemory.c (revision 6023)
+++ mingw-w64-crt/intrincs/RtlSecureZeroMemory.c (working copy)
@@ -1,17 +0,0 @@
-#define __CRT__NO_INLINE 1
-#include <windows.h>
-
-PVOID WINAPI RtlSecureZeroMemory(PVOID ptr,SIZE_T cnt)
-{
- volatile char *vptr = (volatile char *)ptr;
-#ifdef __x86_64
- __stosb ((PBYTE)((DWORD64)vptr),0,cnt);
-#else
- while (cnt != 0)
- {
- *vptr++ = 0;
- cnt--;
- }
-#endif /* __x86_64 */
- return ptr;
-}
Index: mingw-w64-crt/intrincs/umul128ex.c
===================================================================
--- mingw-w64-crt/intrincs/umul128ex.c (revision 6023)
+++ mingw-w64-crt/intrincs/umul128ex.c (working copy)
@@ -1,17 +0,0 @@
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#define __CRT__NO_INLINE
-#include <windows.h>
-
-/* for __x86_64 only */
-
-DWORD64 UnsignedMultiplyExtract128(DWORD64 Multiplier,DWORD64 Multiplicand,BYTE Shift) {
- DWORD64 extractedProduct;
- DWORD64 highProduct;
- DWORD64 lowProduct;
- lowProduct = UnsignedMultiply128(Multiplier,Multiplicand,&highProduct);
- extractedProduct = ShiftRight128(lowProduct,highProduct,Shift);
- return extractedProduct;
-}
-
Index: mingw-w64-crt/Makefile.am
===================================================================
--- mingw-w64-crt/Makefile.am (revision 6023)
+++ mingw-w64-crt/Makefile.am (working copy)
@@ -263,7 +263,7 @@
intrincs/__stosw.c intrincs/_rotl64.c intrincs/_rotr64.c intrincs/bitscanfwd.c intrincs/bitscanrev.c \
intrincs/bittest.c intrincs/bittestc.c intrincs/bittestci.c intrincs/bittestr.c intrincs/bittestri.c \
intrincs/bittests.c intrincs/bittestsi.c intrincs/cpuid.c \
- intrincs/ilockadd.c intrincs/ilockand.c intrincs/ilockand64.c \
+ intrincs/ilockand.c intrincs/ilockand64.c \
intrincs/ilockcxch.c \
intrincs/ilockcxch16.c intrincs/ilockcxch64.c intrincs/ilockcxchptr.c intrincs/ilockdec.c intrincs/ilockdec16.c \
intrincs/ilockdec64.c intrincs/ilockexch.c intrincs/ilockexch64.c intrincs/ilockexchadd.c intrincs/ilockexchadd64.c \
@@ -273,15 +273,15 @@
intrincs/outbytestring.c intrincs/outdword.c intrincs/outdwordstring.c intrincs/outword.c intrincs/outwordstring.c \
intrincs/rdtsc.c intrincs/readcr0.c intrincs/readcr2.c intrincs/readcr3.c intrincs/readcr4.c \
intrincs/readcr8.c intrincs/readmsr.c intrincs/writecr0.c intrincs/writecr2.c intrincs/writecr3.c \
- intrincs/writecr4.c intrincs/writecr8.c intrincs/writemsr.c intrincs/__int2c.c intrincs/RtlSecureZeroMemory.c
+ intrincs/writecr4.c intrincs/writecr8.c intrincs/writemsr.c intrincs/__int2c.c
# these only go into the 64 bit version:
src_intrincs64=\
intrincs/bittest64.c intrincs/bittestc64.c intrincs/bittestr64.c intrincs/bittestri64.c \
intrincs/bittests64.c intrincs/bittestsi64.c intrincs/bitscanfwd64.c intrincs/bitscanrev64.c \
- intrincs/ilockadd64.c intrincs/readgsbyte.c intrincs/readgsword.c intrincs/readgsdword.c \
+ intrincs/readgsbyte.c intrincs/readgsword.c intrincs/readgsdword.c \
intrincs/readgsqword.c intrincs/writegsbyte.c intrincs/writegsword.c intrincs/writegsdword.c \
- intrincs/writegsqword.c intrincs/mul128ex.c intrincs/umul128ex.c intrincs/_mul128.c \
+ intrincs/writegsqword.c intrincs/_mul128.c \
intrincs/_umul128.c intrincs/__movsq.c intrincs/__stosq.c intrincs/__shiftright128.c \
intrincs/bittestci64.c intrincs/__faststorefence.c intrincs/__shiftleft128.c
Index: mingw-w64-headers/crt/intrin.h
===================================================================
--- mingw-w64-headers/crt/intrin.h (revision 6023)
+++ mingw-w64-headers/crt/intrin.h (working copy)
@@ -20,13 +20,13 @@
is included by intrin.h, as well as various platform sdk headers.
- Including intrin.h will create definitions/implementations for all available MSVC intrinsics.
- Including various platforms sdk headers will only include the intrinsics defined in that
- header. As of this writing, only winnt.h uses this approach.
+ header. As of this writing, only winnt.h and winbase.h use this approach.
- If an application defines its own prototypes for intrinsics (ie without including any
platform header or intrin.h), the symbols will be resolved from the library. Since this
will likely result in the code being invoked via 'call', performance may be degraded.
If you wish to implement intrinsic functions that are defined in intrin.h but are not
- yet implemented in mingw, see the comments at the top of intrin-impl.h.
+ yet implemented in mingw-w64, see the comments at the top of intrin-impl.h.
*/
#ifndef __INTRIN_H_
@@ -38,7 +38,6 @@
#include <setjmp.h>
#endif
#include <stddef.h>
-#include <psdk_inc/intrin-impl.h>
#if defined(__GNUC__) && \
(defined(__i386__) || defined(__x86_64__))
@@ -58,12 +57,24 @@
#include <x86intrin.h>
+/* As of gcc 4.8.1, x86intrin doesn't correctly define _lrotr/_lrotl if longs are 4 bytes */
+#if __SIZEOF_LONG__ == 4
+#undef _lrotl
+#undef _lrotr
+
+#define _lrotl(a,b) __rold((a), (b))
+#define _lrotr(a,b) __rord((a), (b))
+
+#endif
+
#if defined(__cplusplus)
}
#endif
#endif
+#include <psdk_inc/intrin-impl.h>
+
#ifndef __MMX__
typedef union __m64 { char v[7]; } __m64;
#endif
@@ -395,13 +406,11 @@
#undef _lrotl
#pragma push_macro ("_lrotr")
#undef _lrotr
-#ifdef __x86_64__
- __MACHINE(__MINGW_EXTENSION unsigned long long __cdecl _lrotl(unsigned long long,int))
- __MACHINE(__MINGW_EXTENSION unsigned long long __cdecl _lrotr(unsigned long long,int))
-#else
- __MACHINE(unsigned __LONG32 __cdecl _lrotl(unsigned __LONG32,int))
- __MACHINE(unsigned __LONG32 __cdecl _lrotr(unsigned __LONG32,int))
-#endif
+ /* Note that we are deliberately NOT using __LONG32 here.
+ These signatures will change depending on the compiler's definition
+ of "long." For size-specific rotates, look at rotl and _rotl64 */
+ __MACHINE(unsigned long __cdecl _lrotl(unsigned long,int))
+ __MACHINE(unsigned long __cdecl _lrotr(unsigned long,int))
#pragma pop_macro ("_lrotl")
#pragma pop_macro ("_lrotr")
@@ -1017,13 +1026,13 @@
__MACHINEI(void __invlpg(void*))
/* __MACHINEI(__MINGW_EXTENSION unsigned __int64 __readmsr(unsigned __LONG32)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEI(__MINGW_EXTENSION void __writemsr(unsigned __LONG32,unsigned __int64)) moved to psdk_inc/intrin-impl.h */
-#ifndef __GNUC__
- __MACHINEI(__MINGW_EXTENSION unsigned __int64 __rdtsc(void))
-#endif
- __MACHINEI(void __movsb(unsigned char *,unsigned char const *,size_t))
- __MACHINEI(void __movsw(unsigned short *,unsigned short const *,size_t))
- __MACHINEI(void __movsd(unsigned __LONG32 *,unsigned __LONG32 const *,size_t))
- __MACHINEX64(__MINGW_EXTENSION void __movsq(unsigned long long *,unsigned long long const *,size_t))
+/* #ifndef __GNUC__ */
+ /* __MACHINEI(__MINGW_EXTENSION unsigned __int64 __rdtsc(void)) moved to psdk_inc/intrin-impl.h */
+/* #endif */
+ /* __MACHINEI(void __movsb(unsigned char *,unsigned char const *,size_t)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEI(void __movsw(unsigned short *,unsigned short const *,size_t)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEI(void __movsd(unsigned __LONG32 *,unsigned __LONG32 const *,size_t)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEX64(__MINGW_EXTENSION void __movsq(unsigned long long *,unsigned long long const *,size_t)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEX64(unsigned char __readgsbyte(unsigned __LONG32 Offset)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEX64(unsigned short __readgsword(unsigned __LONG32 Offset)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEX64(unsigned __LONG32 __readgsdword(unsigned __LONG32 Offset)) moved to psdk_inc/intrin-impl.h */
@@ -1102,10 +1111,10 @@
/* __MACHINEW64(__MINGW_EXTENSION unsigned char _BitScanForward64(unsigned __LONG32 *Index,unsigned __int64 Mask)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEW64(__MINGW_EXTENSION unsigned char _BitScanReverse64(unsigned __LONG32 *Index,unsigned __int64 Mask)) moved to psdk_inc/intrin-impl.h */
__MACHINEIW64(_CRTIMP wchar_t *__cdecl _wcsset(wchar_t *,wchar_t))
- __MACHINEW64(__MINGW_EXTENSION unsigned __int64 __shiftleft128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift))
- __MACHINEW64(__MINGW_EXTENSION unsigned __int64 __shiftright128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift))
- __MACHINEW64(__MINGW_EXTENSION unsigned __int64 _umul128(unsigned __int64 multiplier,unsigned __int64 multiplicand,unsigned __int64 *highproduct))
- __MACHINEW64(__MINGW_EXTENSION __int64 _mul128(__int64 multiplier,__int64 multiplicand,__int64 *highproduct))
+ /* __MACHINEW64(__MINGW_EXTENSION unsigned __int64 __shiftleft128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEW64(__MINGW_EXTENSION unsigned __int64 __shiftright128(unsigned __int64 LowPart,unsigned __int64 HighPart,unsigned char Shift)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEW64(__MINGW_EXTENSION unsigned __int64 _umul128(unsigned __int64 multiplier,unsigned __int64 multiplicand,unsigned __int64 *highproduct)) moved to psdk_inc/intrin-impl.h */
+ /* __MACHINEW64(__MINGW_EXTENSION __int64 _mul128(__int64 multiplier,__int64 multiplicand,__int64 *highproduct)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEI(void __int2c(void)) moved to psdk_inc/intrin-impl.h */
/* __MACHINEIW64(void _ReadBarrier(void)) moved to psdk_inc/intrin-impl.h */
__MACHINEIW64(unsigned char _rotr8(unsigned char value,unsigned char shift))
Index: mingw-w64-headers/crt/stdlib.h
===================================================================
--- mingw-w64-headers/crt/stdlib.h (revision 6023)
+++ mingw-w64-headers/crt/stdlib.h (working copy)
@@ -528,13 +528,11 @@
#pragma push_macro ("_lrotl")
#undef _lrotr
#undef _lrotl
-#ifdef __x86_64__
- __MINGW_EXTENSION unsigned long long __cdecl _lrotl(unsigned long long _Val,int _Shift);
- __MINGW_EXTENSION unsigned long long __cdecl _lrotr(unsigned long long _Val,int _Shift);
-#else
+ /* Note that we are deliberately NOT using __LONG32 here.
+ These signatures will change depending on the compiler's definition
+ of "long." For size-specific rotates, look at rotl and _rotl64 */
unsigned long __cdecl _lrotl(unsigned long _Val,int _Shift);
unsigned long __cdecl _lrotr(unsigned long _Val,int _Shift);
-#endif
#pragma pop_macro ("_lrotl")
#pragma pop_macro ("_lrotr")
Index: mingw-w64-headers/include/psdk_inc/intrin-impl.h
===================================================================
--- mingw-w64-headers/include/psdk_inc/intrin-impl.h (revision 6023)
+++ mingw-w64-headers/include/psdk_inc/intrin-impl.h (working copy)
@@ -70,13 +70,14 @@
/* Parameters: (FunctionName, DataType, Operator)
FunctionName: Any valid function name
- DataType: BYTE, WORD, DWORD or DWORD64 */
+ DataType: BYTE, WORD, DWORD or DWORD64
+ InstructionSizeIntel: b, w, d, q (not b,w,l,q) */
/* While we don't need the output values for Dest or Count, we
must still inform the compiler the asm changes them. */
-#define __buildstos(x, y) void x(y *Dest, y Data, size_t Count) \
+#define __buildstos(x, y, z) void x(y *Dest, y Data, size_t Count) \
{ \
- __asm__ __volatile__ ("rep stos%z[Data]" \
+ __asm__ __volatile__ ("rep stos{%z[Data]|" z "}" \
: "+D" (Dest), "+c" (Count) \
: [Data] "a" (Data) \
: "memory"); \
@@ -305,6 +306,23 @@
: "memory"); \
}
+/* This macro is used by __movsb, __movsd, __movsq, __movsw
+
+Parameters: (FunctionName, DataType, RegisterNumber)
+ FunctionName: Any valid function name
+ DataType: unsigned char, unsigned short, unsigned __LONG32, unsigned __int64
+ InstructionSize: b, w, d, q
+
+ */
+#define __buildmov(x, y, z) void x(y *Destination, y const *Source, size_t Count) \
+{ \
+ __asm__ __volatile__ ( \
+ "rep movs" z \
+ : "=D" (Destination), "=S" (Source), "=c" (Count) \
+ : "0" (Destination), "1" (Source), "2" (Count) \
+ : "memory"); \
+}
+
#endif /* _INTRIN_MAC_ */
/* The Barrier functions can never be in the library. Since gcc only
@@ -422,6 +440,10 @@
#define __INTRINSIC_SPECIAL__bittestandset64
#define __INTRINSIC_SPECIAL__bittestandreset64
#define __INTRINSIC_SPECIAL__bittestandcomplement64
+#define __INTRINSIC_SPECIAL___movsb
+#define __INTRINSIC_SPECIAL___movsw
+#define __INTRINSIC_SPECIAL___movsd
+#define __INTRINSIC_SPECIAL___movsq
#endif /* __INTRINSIC_GROUP_WINNT */
@@ -473,7 +495,7 @@
#if __INTRINSIC_PROLOG(__stosq)
__MINGW_EXTENSION void __stosq(unsigned __int64 *, unsigned __int64, size_t);
__INTRINSICS_USEINLINE
-__buildstos(__stosq, unsigned __int64)
+__buildstos(__stosq, unsigned __int64, "q")
#define __INTRINSIC_DEFINED___stosq
#endif /* __INTRINSIC_PROLOG */
@@ -737,6 +759,75 @@
#define __INTRINSIC_DEFINED___writecr8
#endif /* __INTRINSIC_PROLOG */
+#if __INTRINSIC_PROLOG(__movsq)
+__MINGW_EXTENSION void __movsq(unsigned __int64 *Dest, unsigned __int64 const *Source, size_t Count);
+__MINGW_EXTENSION __INTRINSICS_USEINLINE
+__buildmov(__movsq, unsigned __int64, "q")
+#define __INTRINSIC_DEFINED___movsq
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(_umul128)
+unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64 *);
+__INTRINSICS_USEINLINE
+unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, unsigned __int64 *hi)
+{
+ __MINGW_EXTENSION union { unsigned __int128 v; unsigned __int64 sv[2]; } var;
+ var.v = a;
+ var.v *= b;
+ if (hi) *hi = var.sv[1];
+ return var.sv[0];
+}
+#define __INTRINSIC_DEFINED__umul128
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(_mul128)
+__int64 _mul128(__int64, __int64, __int64 *);
+__INTRINSICS_USEINLINE
+__int64 _mul128(__int64 a, __int64 b, __int64 *hi)
+{
+ __MINGW_EXTENSION union { __int128 v; __int64 sv[2]; } var;
+ var.v = a;
+ var.v *= b;
+ if (hi) *hi = var.sv[1];
+ return var.sv[0];
+}
+#define __INTRINSIC_DEFINED__mul128
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(__shiftleft128)
+unsigned __int64 __shiftleft128(unsigned __int64 LowPart, unsigned __int64 HighPart, unsigned char Shift);
+__INTRINSICS_USEINLINE
+unsigned __int64 __shiftleft128 (unsigned __int64 LowPart, unsigned __int64 HighPart, unsigned char Shift)
+{
+ unsigned __int64 ret;
+
+ __asm__ ("shld {%[Shift],%[LowPart],%[HighPart]|%[HighPart], %[LowPart], %[Shift]}"
+ : [ret] "=r" (ret)
+ : [LowPart] "r" (LowPart), [HighPart] "0" (HighPart), [Shift] "Jc" (Shift)
+ : "cc");
+
+ return ret;
+}
+#define __INTRINSIC_DEFINED___shiftleft128
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(__shiftright128)
+unsigned __int64 __shiftright128 (unsigned __int64 LowPart, unsigned __int64 HighPart, unsigned char Shift);
+__INTRINSICS_USEINLINE
+unsigned __int64 __shiftright128 (unsigned __int64 LowPart, unsigned __int64 HighPart, unsigned char Shift)
+{
+ unsigned __int64 ret;
+
+ __asm__ ("shrd {%[Shift],%[HighPart],%[LowPart]|%[LowPart], %[HighPart], %[Shift]}"
+ : [ret] "=r" (ret)
+ : [LowPart] "0" (LowPart), [HighPart] "r" (HighPart), [Shift] "Jc" (Shift)
+ : "cc");
+
+ return ret;
+}
+#define __INTRINSIC_DEFINED___shiftright128
+#endif /* __INTRINSIC_PROLOG */
+
#endif /* __x86_64__ */
/* ***************************************************** */
@@ -755,21 +846,21 @@
#if __INTRINSIC_PROLOG(__stosb)
void __stosb(unsigned char *, unsigned char, size_t);
__INTRINSICS_USEINLINE
-__buildstos(__stosb, unsigned char)
+__buildstos(__stosb, unsigned char, "b")
#define __INTRINSIC_DEFINED___stosb
#endif /* __INTRINSIC_PROLOG */
#if __INTRINSIC_PROLOG(__stosw)
void __stosw(unsigned short *, unsigned short, size_t);
__INTRINSICS_USEINLINE
-__buildstos(__stosw, unsigned short)
+__buildstos(__stosw, unsigned short, "w")
#define __INTRINSIC_DEFINED___stosw
#endif /* __INTRINSIC_PROLOG */
#if __INTRINSIC_PROLOG(__stosd)
void __stosd(unsigned __LONG32 *, unsigned __LONG32, size_t);
__INTRINSICS_USEINLINE
-__buildstos(__stosd, unsigned __LONG32)
+__buildstos(__stosd, unsigned __LONG32, "d")
#define __INTRINSIC_DEFINED___stosd
#endif /* __INTRINSIC_PROLOG */
@@ -1108,6 +1199,40 @@
#define __INTRINSIC_DEFINED___writemsr
#endif /* __INTRINSIC_PROLOG */
+#if __INTRINSIC_PROLOG(__movsb)
+void __movsb(unsigned char *Destination, unsigned char const *Source, size_t Count);
+__INTRINSICS_USEINLINE
+__buildmov(__movsb, unsigned char, "b")
+#define __INTRINSIC_DEFINED___movsb
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(__movsw)
+void __movsw(unsigned short *Dest, unsigned short const *Source, size_t Count);
+__INTRINSICS_USEINLINE
+__buildmov(__movsw, unsigned short, "w")
+#define __INTRINSIC_DEFINED___movsw
+#endif /* __INTRINSIC_PROLOG */
+
+#if __INTRINSIC_PROLOG(__movsd)
+void __movsd(unsigned __LONG32 *Dest, unsigned __LONG32 const *Source, size_t Count);
+__INTRINSICS_USEINLINE
+__buildmov(__movsd, unsigned __LONG32, "d")
+#define __INTRINSIC_DEFINED___movsd
+#endif /* __INTRINSIC_PROLOG */
+
+/* Conflicts with ia32intrin.h */
+#ifndef _X86INTRIN_H_INCLUDED
+#if __INTRINSIC_PROLOG(__rdtsc)
+__MINGW_EXTENSION unsigned __int64 __rdtsc(void);
+__MINGW_EXTENSION __INTRINSICS_USEINLINE
+unsigned __int64 __rdtsc(void)
+{
+ return __builtin_ia32_rdtsc();
+}
+#define __INTRINSIC_DEFINED___rdtsc
+#endif /* __INTRINSIC_PROLOG */
+#endif
+
#endif /* defined(__x86_64__) || (defined(_X86_) */
/* ***************************************************** */
Index: mingw-w64-headers/include/winnt.h
===================================================================
--- mingw-w64-headers/include/winnt.h (revision 6023)
+++ mingw-w64-headers/include/winnt.h (working copy)
@@ -1277,7 +1277,20 @@
# if defined(__cplusplus)
extern "C" {
# endif
+
+/* We need implementations for _mm_lfence, _mm_mfence, _mm_sfence, _mm_pause, _mm_prefetch */
# include <x86intrin.h>
+
+/* As of gcc 4.8.1, x86intrin doesn't correctly define _lrotr/_lrotl if longs are 4 bytes */
+#if __SIZEOF_LONG__ == 4
+#undef _lrotl
+#undef _lrotr
+
+#define _lrotl(a,b) __rold((a), (b))
+#define _lrotr(a,b) __rord((a), (b))
+
+#endif
+
# if defined(__cplusplus)
}
# endif
@@ -1312,10 +1325,10 @@
#define ReadTimeStampCounter() __rdtsc()
- VOID __movsb(PBYTE Destination,BYTE const *Source,SIZE_T Count);
- VOID __movsw(PWORD Destination,WORD const *Source,SIZE_T Count);
- VOID __movsd(PDWORD Destination,DWORD const *Source,SIZE_T Count);
- VOID __movsq(PDWORD64 Destination,DWORD64 const *Source,SIZE_T Count);
+ /* VOID __movsb(PBYTE Destination,BYTE const *Source,SIZE_T Count); moved to psdk_inc/intrin-impl.h */
+ /* VOID __movsw(PWORD Destination,WORD const *Source,SIZE_T Count); moved to psdk_inc/intrin-impl.h */
+ /* VOID __movsd(PDWORD Destination,DWORD const *Source,SIZE_T Count); moved to psdk_inc/intrin-impl.h */
+ /* VOID __movsq(PDWORD64 Destination,DWORD64 const *Source,SIZE_T Count); moved to psdk_inc/intrin-impl.h */
#define MultiplyHigh __mulh
#define UnsignedMultiplyHigh __umulh
@@ -1340,17 +1353,39 @@
LONG64 MultiplyExtract128(LONG64 Multiplier,LONG64 Multiplicand,BYTE Shift);
DWORD64 UnsignedMultiplyExtract128(DWORD64 Multiplier,DWORD64 Multiplicand,BYTE Shift);
-#ifndef __CRT__NO_INLINE
- __CRT_INLINE LONG64 MultiplyExtract128(LONG64 Multiplier,LONG64 Multiplicand,BYTE Shift) {
- LONG64 extractedProduct;
- LONG64 highProduct;
- LONG64 lowProduct;
- lowProduct = Multiply128(Multiplier,Multiplicand,&highProduct);
- extractedProduct = (LONG64)ShiftRight128((LONG64)lowProduct,(LONG64)highProduct,Shift);
- return extractedProduct;
+ FORCEINLINE LONG64 MultiplyExtract128(LONG64 Multiplier, LONG64 Multiplicand, BYTE Shift) {
+ LONG64 extractedProduct;
+ LONG64 highProduct;
+ LONG64 lowProduct;
+
+ ULONG64 uhighProduct;
+ ULONG64 ulowProduct;
+
+ lowProduct = _mul128(Multiplier, Multiplicand, &highProduct);
+
+ uhighProduct = (ULONG64)highProduct;
+ ulowProduct = (ULONG64)lowProduct;
+
+ if (highProduct < 0)
+ {
+ uhighProduct = (ULONG64)(-highProduct);
+ if (lowProduct != 0)
+ {
+ ulowProduct = (ULONG64)(-lowProduct);
+ uhighProduct -= 1;
+ }
+ extractedProduct = (LONG64)__shiftright128(ulowProduct, uhighProduct, Shift);
+ extractedProduct = -extractedProduct;
+ }
+ else
+ {
+ extractedProduct = (LONG64)__shiftright128(ulowProduct, uhighProduct, Shift);
+ }
+
+ return extractedProduct;
}
- __CRT_INLINE DWORD64 UnsignedMultiplyExtract128(DWORD64 Multiplier,DWORD64 Multiplicand,BYTE Shift) {
+ FORCEINLINE DWORD64 UnsignedMultiplyExtract128(DWORD64 Multiplier,DWORD64 Multiplicand,BYTE Shift) {
DWORD64 extractedProduct;
DWORD64 highProduct;
DWORD64 lowProduct;
@@ -1358,7 +1393,6 @@
extractedProduct = ShiftRight128(lowProduct,highProduct,Shift);
return extractedProduct;
}
-#endif
/* unsigned char __readgsbyte(unsigned __LONG32 Offset); moved to psdk_inc/intrin-impl.h */
/* unsigned short __readgsword(unsigned __LONG32 Offset); moved to psdk_inc/intrin-impl.h */
@@ -5582,22 +5616,13 @@
#define RtlFillMemory(Destination,Length,Fill) memset((Destination),(Fill),(Length))
#define RtlZeroMemory(Destination,Length) memset((Destination),0,(Length))
- PVOID WINAPI RtlSecureZeroMemory(PVOID ptr,SIZE_T cnt);
-#ifndef __CRT__NO_INLINE
- __CRT_INLINE PVOID WINAPI RtlSecureZeroMemory(PVOID ptr,SIZE_T cnt) {
- volatile char *vptr =(volatile char *)ptr;
-#ifdef __x86_64
- __stosb((PBYTE)((DWORD64)vptr),0,cnt);
-#else
- while(cnt) {
- *vptr = 0;
- vptr++;
- cnt--;
- }
-#endif /* __x86_64 */
+PVOID RtlSecureZeroMemory(PVOID ptr, SIZE_T cnt);
+FORCEINLINE PVOID RtlSecureZeroMemory(PVOID ptr, SIZE_T cnt) {
+ char *vptr = (char *)ptr;
+ /* This will work "securely" as long as __stosb is volatile and uses the memory clobber */
+ __stosb((PBYTE)vptr, 0, cnt);
return ptr;
}
-#endif /* !__CRT__NO_INLINE */
typedef struct _MESSAGE_RESOURCE_ENTRY {
WORD Length;
------------------------------------------------------------------------------
Get 100% visibility into Java/.NET code with AppDynamics Lite!
It's a free troubleshooting tool designed for production.
Get down to code-level detail for bottlenecks, with <2% overhead.
Download for free and get started troubleshooting in minutes.
http://pubads.g.doubleclick.net/gampad/clk?id=48897031&iu=/4140/ostg.clktrk
_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public