This patch addresses the following issues:
1) The existing code for __faststorefence doesn't actually generate a
fence. It just generates a barrier. This patch maps __faststorefence
to _mm_sfence (instead of doing MS's trick with "lock or"). sfence
appears to be faster than MS's "fast" approach on modern processors.
2) MS's MemoryBarrier (which is supposed to be a full compiler barrier +
processor fence) maps to __faststorefence. This works for MS because
their __faststorefence trick ends up generating a full fence + full
barrier. Since our __faststorefence now uses sfence, this patch adjusts
MemoryBarrier to use _mm_mfence().
3) While there is a prototype for _ReadWriteBarrier in winnt.h, there is
no implementation. Since _ReadWriteBarrier is -only- a compiler
directive (rather like #pragma), there is no way to place it in a
library. As a result, this patch implements it with a #define in both
winnt.h & intrin.h.
4) Gcc doesn't actually support _ReadBarrier() and _WriteBarrier. This
patch defines them as being mapped to _ReadWriteBarrier() with a #define
in both winnt.h & intrin.h.
5) While there is a prototype for __int2c in winnt.h and intrin.h, there
is no implementation. Since MS docs say this is only available as an
intrinsic (what gcc calls builtin), this patch defines it with a macro
in both winnt.h and intrin.h.
6) The code for DbgRaiseAssertionFailure won't compile with
-masm=intel. Use __builtint from intrin-mac.h.
7) Add __buildint to intrin-mac.h for DbgRaiseAssertionFailure & __int2c.
8) On x86, if SSE2 is available, use _mm_pause for YieldProcessor and
_mm_mfence for MemoryBarrier. If SSE2 is not available, build the
appropriate asm ("rep nop" for pause and "xchg" for MemoryBarrier).
dw
Index: mingw-w64-headers/crt/intrin.h
===================================================================
--- mingw-w64-headers/crt/intrin.h (revision 5883)
+++ mingw-w64-headers/crt/intrin.h (working copy)
@@ -12,7 +12,18 @@
#include <setjmp.h>
#endif
#include <stddef.h>
+#include <psdk_inc/intrin-mac.h>
+#define _ReadWriteBarrier() __asm__ __volatile__ ("" ::: "memory")
+#define _ReadBarrier _ReadWriteBarrier
+#define _WriteBarrier _ReadWriteBarrier
+
+#ifdef __x86_64__
+#define __faststorefence _mm_sfence
+#endif
+
+#define __int2c() __buildint(0x2c)
+
#if defined(__GNUC__) && \
(defined(__i386__) || defined(__x86_64__))
extern unsigned int __builtin_ia32_crc32qi (unsigned int, unsigned char);
@@ -436,7 +447,6 @@
__MACHINEIA64(__MINGW_EXTENSION void __ptri(__int64,__int64))
__MACHINEIA64(void *_rdteb(void))
__MACHINESA(int _ReadStatusReg(int))
- __MACHINECE(void _ReadWriteBarrier(void))
__MACHINEIA64(__MINGW_EXTENSION void _ReleaseSpinLock(unsigned __int64 *))
__MACHINEI(void *_ReturnAddress(void))
__MACHINEIA64(void *_ReturnAddress(void))
@@ -496,7 +506,6 @@
__MACHINECE(int __cdecl wcsncmp(const wchar_t *,const wchar_t *,size_t))
__MACHINECE(wchar_t *__cdecl wcsncpy(wchar_t * __restrict__ ,const wchar_t
* __restrict__ ,size_t))
__MACHINECE(wchar_t *__cdecl _wcsset(wchar_t *,wchar_t))
- __MACHINECE(void _WriteBarrier(void))
__MACHINESA(void _WriteStatusReg(int,int,int))
__MACHINEI(void *_AddressOfReturnAddress(void))
__MACHINEIA64(void __yield(void))
@@ -961,11 +970,6 @@
__MACHINEX86X(__m128 _mm_moveldup_ps(__m128))
__MACHINEX86X(void _mm_mwait(unsigned int,unsigned int))
#endif
- __MACHINEI(void _WriteBarrier(void))
- __MACHINEI(void _ReadWriteBarrier(void))
- __MACHINEIA64(void _WriteBarrier(void))
- __MACHINEIA64(void _ReadWriteBarrier(void))
- __MACHINEX64(void __faststorefence(void))
__MACHINEX64(__MINGW_EXTENSION __int64 __mulh(__int64,__int64))
__MACHINEX64(__MINGW_EXTENSION unsigned __int64 __umulh(unsigned
__int64,unsigned __int64))
__MACHINEX64(__MINGW_EXTENSION unsigned __int64 __readcr0(void))
@@ -1077,8 +1081,6 @@
__MACHINEW64(__MINGW_EXTENSION unsigned __int64 __shiftright128(unsigned
__int64 LowPart,unsigned __int64 HighPart,unsigned char Shift))
__MACHINEW64(__MINGW_EXTENSION unsigned __int64 _umul128(unsigned __int64
multiplier,unsigned __int64 multiplicand,unsigned __int64 *highproduct))
__MACHINEW64(__MINGW_EXTENSION __int64 _mul128(__int64 multiplier,__int64
multiplicand,__int64 *highproduct))
- __MACHINEI(void __int2c(void))
- __MACHINEIW64(void _ReadBarrier(void))
__MACHINEIW64(unsigned char _rotr8(unsigned char value,unsigned char
shift))
__MACHINEIW64(unsigned short _rotr16(unsigned short value,unsigned char
shift))
__MACHINEIW64(unsigned char _rotl8(unsigned char value,unsigned char
shift))
Index: mingw-w64-headers/include/psdk_inc/intrin-mac.h
===================================================================
--- mingw-w64-headers/include/psdk_inc/intrin-mac.h (revision 5889)
+++ mingw-w64-headers/include/psdk_inc/intrin-mac.h (working copy)
@@ -54,4 +54,22 @@
return old; \
}
+/* This macro is used by YieldProcessor when compiling x86 w/o SSE2.
+It generates the same opcodes as _mm_pause. */
+#define __buildpause() __asm__ __volatile__("rep nop")
+
+/* This macro is used by DbgRaiseAssertionFailure and __int2c
+
+Parameters: (IntNum)
+IntNum: Interrupt number in hex */
+#define __buildint(a) __asm__ __volatile__("int {$}" #a :)
+
+/* This macro is used by MemoryBarrier when compiling x86 w/o SSE2.
+Note that on i386, xchg performs an implicit lock. */
+#define __buildmemorybarrier() \
+{ \
+unsigned char Barrier; \
+__asm__ __volatile__("xchg{b %%| }al, %0" :"=m" (Barrier) : /* no inputs */ :
"eax", "memory"); \
+}
+
#endif /* _INTRIN_MAC_ */
Index: mingw-w64-headers/include/winnt.h
===================================================================
--- mingw-w64-headers/include/winnt.h (revision 5889)
+++ mingw-w64-headers/include/winnt.h (working copy)
@@ -1479,7 +1479,9 @@
#define CacheLineFlush(Address) _mm_clflush(Address)
- VOID _ReadWriteBarrier(VOID);
+#define _ReadWriteBarrier() __asm__ __volatile__ ("" ::: "memory")
+#define _ReadBarrier _ReadWriteBarrier
+#define _WriteBarrier _ReadWriteBarrier
/* Don't include intrin.h on Cygwin. It pulls in unneeded stuff. */
#ifdef __CYGWIN__
@@ -1499,14 +1501,9 @@
#define MemoryFence _mm_mfence
#define StoreFence _mm_sfence
-#ifdef __MINGW_INTRIN_INLINE
- __MINGW_INTRIN_INLINE void __faststorefence(void) {
- __asm__ __volatile__ ("" ::: "memory");
- }
-#endif
-
+#define __faststorefence _mm_sfence
#define YieldProcessor _mm_pause
-#define MemoryBarrier __faststorefence
+#define MemoryBarrier _mm_mfence
#define PreFetchCacheLine(l,a) _mm_prefetch((CHAR CONST *) a,l)
#define PrefetchForWrite(p) _m_prefetchw(p)
#define ReadForWriteAccess(p) (_m_prefetchw(p),*(p))
@@ -1519,8 +1516,7 @@
#define ReadMxCsr _mm_getcsr
#define WriteMxCsr _mm_setcsr
- VOID __int2c(VOID);
-
+#define __int2c() __buildint(0x2c)
#define DbgRaiseAssertionFailure() __int2c()
#define GetCallersEflags() __getcallerseflags()
@@ -1836,7 +1832,13 @@
#if defined(__i386__) && !defined(__x86_64)
-#define YieldProcessor() __asm__ __volatile__("rep; nop");
+#ifdef __SSE2__
+#define YieldProcessor _mm_pause
+#define MemoryBarrier _mm_mfence
+#else
+#define YieldProcessor __buildpause
+#define MemoryBarrier() __buildmemorybarrier
+#endif
#define PreFetchCacheLine(l,a)
#define ReadForWriteAccess(p) (*(p))
@@ -1848,24 +1850,16 @@
struct _TEB *NtCurrentTeb(void);
PVOID GetCurrentFiber(void);
PVOID GetFiberData(void);
- VOID MemoryBarrier(VOID);
#ifdef __CRT__NO_INLINE
-# define DbgRaiseAssertionFailure() __asm__ __volatile__("int $0x2c");
+#define DbgRaiseAssertionFailure() __buildint(0x2c)
#else
VOID DbgRaiseAssertionFailure(void);
__CRT_INLINE VOID DbgRaiseAssertionFailure(void) {
- __asm__ __volatile__("int $0x2c");
+ __buildint(0x2c);
}
#endif
- __CRT_INLINE VOID MemoryBarrier(VOID)
- {
- LONG Barrier = 0;
- __asm__ __volatile__("xchgl %%eax,%0 "
- :"=r" (Barrier));
- }
-
__CRT_INLINE struct _TEB *NtCurrentTeb(void)
{
struct _TEB *ret;
------------------------------------------------------------------------------
This SF.net email is sponsored by Windows:
Build for Windows Store.
http://p.sf.net/sfu/windows-dev2dev
_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public