One concern I have is performance. Does replacing the intrinsics with
out of line function call whose semantics the compiler has no idea
about have any impact on the runtime efficiency of the generated code?
I would be especially interested in "real life" scenarios such as the
usage of the atomic operations in basic_string.

It would be good to see some before and after numbers. If you don't
have all the platforms to run the test post your benchmark and Travis
can help you put them together.

FYI, in case you're not aware of this (it's not immediately obvious),
even though we define the full set of atomic operations (i.e., for all
integer types) the library only uses the overloads for int and long.

Martin

Farid Zaripov wrote:
Attached is a patch, adding __rw_atomic_{add|xchg}xx() functions on Win32/Win64 platforms.

 ChangeLog:
 * msvc-7.0.config: Added AS config variable.
 * msvc-8.0-x64.config: Ditto.
 * filterdef.js: Added definition of the CustomFileDef class
* projectdef.js (InitAsmTool): New function to init custom build rule for .asm files.
 * projects.js: Added definitions of the platform dependent files.
 * utilities.js: Read AS configuration variable from the .config file.
* i86/atomic.asm: New file with definitions of the __rw_atomic_xxx() for Win32 platform. * i86_64/atomic.asm: New file with definitions of the __rw_atomic_xxx() for Windows/x64 platform.
 * _mutex.h: Removed all dependencies on InterlockedXXX() API functions.
 Use new __rw_atomic_xxx() functions instead of InterlockedXXX().
 * once.cpp [_WIN32 && _DLL]: Tell linker to export __atomic_xxx()
 functions, defined in .asm files.

Farid.


------------------------------------------------------------------------

Index: etc/config/windows/filterdef.js
===================================================================
--- etc/config/windows/filterdef.js     (revision 570339)
+++ etc/config/windows/filterdef.js     (working copy)
@@ -25,7 +25,7 @@
var sourceFilterName = "Source Files";
 var sourceFilterUuid = "{4FC737F1-C7A5-4376-A066-2A32D752A2FF}";
-var sourceFilterExts = ".cpp;.cxx;.s";
+var sourceFilterExts = ".cpp;.cxx;.s;.asm";
var headerFilterName = "Header Files";
 var headerFilterUuid = "{93995380-89BD-4b04-88EB-625FBE52EBFB}";
@@ -56,6 +56,21 @@
     return str;
 }
+//------------------------------------------------
+// CustomFileDef class
+//------------------------------------------------
+
+// CustomFileDef .ctor
+function CustomFileDef(filepath, platform, initfun)
+{
+    this.filepath = filepath;
+    this.platform = platform;
+    this.initfun  = initfun;
+}
+
+// global array with platform dependent files definitions
+var customFileDefs = new Array();
+
 // common macros
 var cmnMacros = new Array();
@@ -126,7 +141,29 @@
     var VCFile = filter.AddFile(filename);
     if (null != filetype && typeof(VCFile.FileType) != "undefined")
         VCFile.FileType = filetype;
- +
+    var customFileDef = null;
+
+    if (!exclude)
+    {
+        // find the platform dependent file definition
+        for (var i = 0; i < customFileDefs.length; ++i)
+        {
+            var custFileDef = customFileDefs[i];
+            var pos = VCFile.FullPath.length - custFileDef.filepath.length;
+            if (0 <= pos && pos == 
VCFile.FullPath.indexOf(custFileDef.filepath))
+            {
+                customFileDef = custFileDef;
+                break;
+            }
+        }
+
+        // exclude this file from build if current platform
+        // is not custom file target platform
+        if (null != customFileDef && customFileDef.platform != PLATFORM)
+            exclude = true;
+    }
+ if (exclude)
     {
         var cfgs = VCFile.FileConfigurations;
@@ -144,6 +181,12 @@
             cfg.ExcludedFromBuild = exclude;
         }
     }
+    else if (null != customFileDef &&
+             "undefined" != typeof(customFileDef.initfun))
+    {
+        // init
+        customFileDef.initfun(VCFile);
+    }
 }
// create VCFilter object from the FilterDef definition
Index: etc/config/windows/msvc-7.0.config
===================================================================
--- etc/config/windows/msvc-7.0.config  (revision 570339)
+++ etc/config/windows/msvc-7.0.config  (working copy)
@@ -38,6 +38,7 @@
 CXX=cl
 LD=cl
 AR=lib
+AS=ml
// Use singlethreaded or mutlithreaded CRT in 11s, 11d solution configurations
 // 0 for MS VisualStudio .NET and MS VisualStudio .NET 2003
Index: etc/config/windows/msvc-8.0-x64.config
===================================================================
--- etc/config/windows/msvc-8.0-x64.config      (revision 570339)
+++ etc/config/windows/msvc-8.0-x64.config      (working copy)
@@ -1,2 +1,3 @@
 #include msvc-8.0
 PLATFORM=x64
+AS=ml64
Index: etc/config/windows/projectdef.js
===================================================================
--- etc/config/windows/projectdef.js    (revision 570339)
+++ etc/config/windows/projectdef.js    (working copy)
@@ -941,3 +941,25 @@
return projectDef;
 }
+
+// init custom build rule for .asm files
+function InitAsmTool(VCFile)
+{
+    var cfgs = VCFile.FileConfigurations;
+    for (var i = 1; i <= cfgs.Count; ++i)
+    {
+        var cfg = cfgs.Item(i);
+        if ((typeof(cfg.Tool.ToolKind) != "undefined" &&
+            cfg.Tool.ToolKind != "VCCustomBuildTool") ||
+            cfg.Tool.ToolName != "Custom Build Tool")
+        {
+            cfg.Tool = 
cfg.ProjectConfiguration.FileTools.Item("VCCustomBuildTool");
+        }
+
+        var tool = cfg.Tool;
+        tool.Description = "Compiling .asm file...";
+        tool.Outputs = "$(IntDir)\\$(InputName).obj";
+        tool.CommandLine = AS + " /c /nologo /Fo" + tool.Outputs +
+                           " /W3 /Zi /Ta" + VCFile.RelativePath;
+    }
+}
Index: etc/config/windows/projects.js
===================================================================
--- etc/config/windows/projects.js      (revision 570339)
+++ etc/config/windows/projects.js      (working copy)
@@ -84,6 +84,10 @@
     projectDefs.push(new Array(configureDef));
///////////////////////////////////////////////////////////////////////////////
+    // add platform dependent files
+    customFileDefs.push(new CustomFileDef("i86\\atomic.asm", "Win32", 
InitAsmTool));
+    customFileDefs.push(new CustomFileDef("i86_64\\atomic.asm", "x64", 
InitAsmTool));
+
     var stdcxxDef = new ProjectDef(".stdcxx", typeLibrary);
     stdcxxDef.VCProjDir = ProjectsDir;
     stdcxxDef.FilterDefs.push(
Index: etc/config/windows/utilities.js
===================================================================
--- etc/config/windows/utilities.js     (revision 570339)
+++ etc/config/windows/utilities.js     (working copy)
@@ -32,6 +32,7 @@
 var CXX = "cl";
 var LD = "cl";
 var AR = "lib";
+var AS = "ml";
 var SLNVER="8.00";
 var SLNCOMMENT="";
 var UNICODELOG = false;
@@ -112,6 +113,9 @@
         case "AR":
             AR = arr[2];
             break;
+        case "AS":
+            AS = arr[2];
+            break;
         case "SLNVER":
             SLNVER = arr[2];
             break;
@@ -179,6 +183,7 @@
     stream.WriteLine("  CXX=" + CXX);
     stream.WriteLine("  LD=" + LD);
     stream.WriteLine("  AR=" + AR);
+    stream.WriteLine("  AS=" + AS);
     stream.WriteLine("  SLNVER=" + SLNVER);
     stream.WriteLine("  SLNCOMMENT=" + SLNCOMMENT);
     stream.WriteLine("  UNICODELOG=" + UNICODELOG);
Index: include/rw/_mutex.h
===================================================================
--- include/rw/_mutex.h (revision 570431)
+++ include/rw/_mutex.h (working copy)
@@ -140,15 +140,7 @@
 __declspec (dllimport) void __stdcall
 DeleteCriticalSection (_RTL_CRITICAL_SECTION*);
-__declspec (dllimport) long __stdcall
-InterlockedIncrement (_RWSTD_INTERLOCKED_T*);
-
-__declspec (dllimport) long __stdcall
-InterlockedDecrement (_RWSTD_INTERLOCKED_T*);
-
-__declspec (dllimport) long __stdcall
-InterlockedExchange (_RWSTD_INTERLOCKED_T*, long);
-
+_RWSTD_EXPORT int __rw_atomic_add32 (int*, int);
 }   // extern "C"
_RWSTD_NAMESPACE (__rw) { @@ -478,11 +470,11 @@
     // implicit initialization used to prevent a g++ 2.95.2 warning on Tru64
     // sorry: semantics of inline function static data are wrong (you'll wind
     // up with multiple copies)
-    static volatile long __cntr /* = 0 */;   // initialization counter
+    static volatile int __cntr /* = 0 */;   // initialization counter
#if defined (_WIN32) || defined (_WIN64)
     // MT safe
-    if (0 == __cntr && 1 == InterlockedIncrement ((long*)&__cntr))
+    if (0 == __cntr && 1 == __rw_atomic_add32 ((int*)&__cntr, +1))
 #else
     // not so safe (volatile should help)
     if (0 == __cntr && 1 == ++__cntr)
@@ -1161,19 +1153,20 @@
                                         false);
} -/********************** i386/gcc **************************************/
+/********************** i386/gcc || _M_IX86 *********************************/
-#elif defined (__i386__) && (defined (__GNUG__) || defined (__INTEL_COMPILER))
+#elif defined (__i386__) && (defined (__GNUG__) \
+   || defined (__INTEL_COMPILER)) || defined (_M_IX86)
extern "C" { -char __rw_atomic_add8 (char*, int);
-short __rw_atomic_add16 (short*, short);
-int __rw_atomic_add32 (int*, int);
+_RWSTD_EXPORT char __rw_atomic_add8 (char*, int);
+_RWSTD_EXPORT short __rw_atomic_add16 (short*, short);
+_RWSTD_EXPORT int __rw_atomic_add32 (int*, int);
-char __rw_atomic_xchg8 (char*, char);
-short __rw_atomic_xchg16 (short*, short);
-int __rw_atomic_xchg32 (int*, int);
+_RWSTD_EXPORT char __rw_atomic_xchg8 (char*, char);
+_RWSTD_EXPORT short __rw_atomic_xchg16 (short*, short);
+_RWSTD_EXPORT int __rw_atomic_xchg32 (int*, int);
} // extern "C" @@ -1349,85 +1342,39 @@
                                _RWSTD_STATIC_CAST (int, __y));
 }
+/********************** IA64/x86_64/_M_X64 *****************************/ -/********************** WIN 32/64 ************************************/
+#elif defined (__ia64) || defined (__x86_64) || defined (_M_X64)
-#elif defined (_WIN32)
+extern "C" {
-// Interlocked[In|De]crement functions atomically modify their argument
-// and return the new value
+_RWSTD_EXPORT _RWSTD_INT8_T
+__rw_atomic_xchg8  (_RWSTD_INT8_T*,  _RWSTD_INT8_T);
-// InterlockedExchange atomically sets the value pointed to by the first
-// argument to that of the second argument and returns the original value
+_RWSTD_EXPORT _RWSTD_INT16_T
+__rw_atomic_xchg16 (_RWSTD_INT16_T*, _RWSTD_INT16_T);
-inline int
-__rw_atomic_preincrement (int &__x, bool)
-{
-    _RWSTD_COMPILE_ASSERT (sizeof __x == sizeof (long));
-    return InterlockedIncrement (_RWSTD_REINTERPRET_CAST (long*, &__x));
-}
+_RWSTD_EXPORT _RWSTD_INT32_T
+__rw_atomic_xchg32 (_RWSTD_INT32_T*, _RWSTD_INT32_T);
-inline unsigned int
-__rw_atomic_preincrement (unsigned int &__x, bool)
-{
-    return __rw_atomic_preincrement (_RWSTD_REINTERPRET_CAST (int&, __x),
-                                     false);
-}
+_RWSTD_EXPORT _RWSTD_INT8_T
+__rw_atomic_add8  (_RWSTD_INT8_T*,  _RWSTD_INT8_T);
+_RWSTD_EXPORT _RWSTD_INT16_T
+__rw_atomic_add16 (_RWSTD_INT16_T*, _RWSTD_INT16_T);
-inline int
-__rw_atomic_predecrement (int &__x, bool)
-{
-    _RWSTD_COMPILE_ASSERT (sizeof __x == sizeof (long));
-    return InterlockedDecrement (_RWSTD_REINTERPRET_CAST (long*, &__x));
-}
+_RWSTD_EXPORT _RWSTD_INT32_T
+__rw_atomic_add32 (_RWSTD_INT32_T*, _RWSTD_INT32_T);
-
-inline unsigned int
-__rw_atomic_predecrement (unsigned int &__x, bool)
-{
-    return __rw_atomic_predecrement (_RWSTD_REINTERPRET_CAST (int&, __x),
-                                     false);
-}
-
-
-inline int
-__rw_atomic_exchange (int &__x, int __y, bool)
-{
-    _RWSTD_COMPILE_ASSERT (sizeof __x == sizeof (long));
-    return InterlockedExchange (_RWSTD_REINTERPRET_CAST (long*, &__x),
-                                _RWSTD_STATIC_CAST (long, __y));
-}
-
-
-inline unsigned int
-__rw_atomic_exchange (unsigned int &__x, unsigned int __y, bool)
-{
-    return __rw_atomic_exchange (_RWSTD_REINTERPRET_CAST (int&, __x),
-                                 _RWSTD_STATIC_CAST (int, __y), false);
-}
-
-/********************** IA64/x86_64 ***********************************/
-
-#elif defined (__ia64) || defined (__x86_64)
-
-extern "C" {
-
-_RWSTD_INT8_T  __rw_atomic_xchg8  (_RWSTD_INT8_T*,  _RWSTD_INT8_T);
-_RWSTD_INT16_T __rw_atomic_xchg16 (_RWSTD_INT16_T*, _RWSTD_INT16_T);
-_RWSTD_INT32_T __rw_atomic_xchg32 (_RWSTD_INT32_T*, _RWSTD_INT32_T);
-
-
-_RWSTD_INT8_T  __rw_atomic_add8  (_RWSTD_INT8_T*,  _RWSTD_INT8_T);
-_RWSTD_INT16_T __rw_atomic_add16 (_RWSTD_INT16_T*, _RWSTD_INT16_T);
-_RWSTD_INT32_T __rw_atomic_add32 (_RWSTD_INT32_T*, _RWSTD_INT32_T);
-
 #ifdef _RWSTD_INT64_T
-_RWSTD_INT64_T __rw_atomic_xchg64 (_RWSTD_INT64_T*, _RWSTD_INT64_T);
-_RWSTD_INT64_T __rw_atomic_add64 (_RWSTD_INT64_T*, _RWSTD_INT64_T);
+_RWSTD_EXPORT _RWSTD_INT64_T
+__rw_atomic_xchg64 (_RWSTD_INT64_T*, _RWSTD_INT64_T);
+_RWSTD_EXPORT _RWSTD_INT64_T
+__rw_atomic_add64 (_RWSTD_INT64_T*, _RWSTD_INT64_T);
+
 #endif   // _RWSTD_INT64_T
} // extern "C"
Index: src/i86/atomic.asm
===================================================================
--- src/i86/atomic.asm  (revision 0)
+++ src/i86/atomic.asm  (revision 0)
@@ -0,0 +1,178 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; i86/atomic.asm
+;
+; $Id$
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Licensed to the Apache Software  Foundation (ASF) under one or more
+; contributor  license agreements.  See  the NOTICE  file distributed
+; with  this  work  for  additional information  regarding  copyright
+; ownership.   The ASF  licenses this  file to  you under  the Apache
+; License, Version  2.0 (the  "License"); you may  not use  this file
+; except in  compliance with the License.   You may obtain  a copy of
+; the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the  License is distributed on an  "AS IS" BASIS,
+; WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+; implied.   See  the License  for  the  specific language  governing
+; permissions and limitations under the License.
+;
+; Copyright 2003-2006 Rogue Wave Software.
+; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+                .486
+                .model flat
+                .code
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" char __rw_atomic_xchg8 (char *x, char y);
+;
+; Atomically assigns the 8-bit value y to *x and returns
+; the original (before assignment) 8-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_xchg8
+___rw_atomic_xchg8 proc                     ; char (char *x, char y)
+
+arg_x           = dword ptr  4
+arg_y           = byte ptr  8
+ + mov ecx, [esp+arg_x] ; %ecx = x
+                mov     al,  [esp+arg_y]    ; %al = y
+                xchg    al,  [ecx]          ; %al <-> (%ecx)
+ ret +___rw_atomic_xchg8 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" short __rw_atomic_xchg16 (short *x, short y);
+;
+; Atomically assigns the 16-bit value y to *x and returns
+; the original (before assignment) 16-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_xchg16
+___rw_atomic_xchg16 proc                    ; short (short *x, short y)
+
+arg_x           = dword ptr  4
+arg_y           = word ptr  8
+ + mov ecx, [esp+arg_x] ; %ecx = x + mov ax, [esp+arg_y] ; %eax = y + xchg ax, [ecx] ; %ax <-> (%ecx) + ret
+___rw_atomic_xchg16 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int __rw_atomic_xchg32 (int *x, int y);
+;
+; Atomically assigns the 32-bit value y to *x and returns
+; the original (before assignment) 32-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_xchg32
+___rw_atomic_xchg32 proc                    ; int (int *x, int y)
+
+arg_x           = dword ptr  4
+arg_y           = dword ptr  8
+ + mov ecx, [esp+arg_x] ; %ecx = x
+                mov     eax, [esp+arg_y]    ; %eax = y
+                xchg    eax, [ecx]          ; %eax <-> (%ecx)
+                ret
+___rw_atomic_xchg32 endp
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" char __rw_atomic_add8 (char *x, int y);
+;
+; Atomically increments the 8-bit value *x by y and returns
+; the new (after increment) 8-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_add8
+___rw_atomic_add8 proc                       ; char (char *dst, int inc)
+
+arg_dst         = dword ptr  4
+arg_inc         = dword ptr  8
+ + mov ecx, [esp+arg_dst] ; %ecx = dst + mov eax, [esp+arg_inc] ; %eax = inc + mov edx, eax +
+                lock xadd [ecx], al          ; tmp = *dst;
+                                             ; dst += inc;
+ ; %al = tmp + + add eax, edx ; return %eax + inc + ret
+___rw_atomic_add8 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" short __rw_atomic_add16 (short *x, short y);
+;
+; Atomically increments the 16-bit value *x by y and returns
+; the new (after increment) 16-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_add16
+___rw_atomic_add16 proc                      ; short (short *dst, short inc)
+
+arg_dst         = dword ptr  4
+arg_inc         = dword ptr  8
+
+ mov ecx, [esp+arg_dst] ; %ecx = dst + mov eax, [esp+arg_inc] ; %eax = inc + mov edx, eax + + lock xadd [ecx], ax ; tmp = *dst;
+                                             ; dst += inc;
+ ; %ax = tmp + + add eax, edx ; return %eax + inc + ret
+___rw_atomic_add16 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int __rw_atomic_add32 (int *x, int y);
+;
+; Atomically increments the 32-bit value *x by y and returns
+; the new (after increment) 32-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 4
+                public ___rw_atomic_add32
+___rw_atomic_add32 proc                      ; int (int *dst, int inc)
+
+arg_dst         = dword ptr  4
+arg_inc         = dword ptr  8
+
+ mov ecx, [esp+arg_dst] ; %ecx = dst + mov edx, [esp+arg_inc] ; %edx = inc + mov eax, edx + + lock xadd [ecx], eax ; tmp = *dst;
+                                             ; dst += inc;
+ ; %eax = tmp + + add eax, edx ; return %eax + inc + ret
+___rw_atomic_add32 endp
+
+                end

Property changes on: src\i86\atomic.asm
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Index: src/i86_64/atomic.asm
===================================================================
--- src/i86_64/atomic.asm       (revision 0)
+++ src/i86_64/atomic.asm       (revision 0)
@@ -0,0 +1,186 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; i86_64/atomic.asm
+;
+; $Id$
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Licensed to the Apache Software  Foundation (ASF) under one or more
+; contributor  license agreements.  See  the NOTICE  file distributed
+; with  this  work  for  additional information  regarding  copyright
+; ownership.   The ASF  licenses this  file to  you under  the Apache
+; License, Version  2.0 (the  "License"); you may  not use  this file
+; except in  compliance with the License.   You may obtain  a copy of
+; the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the  License is distributed on an  "AS IS" BASIS,
+; WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+; implied.   See  the License  for  the  specific language  governing
+; permissions and limitations under the License.
+;
+; Copyright 2003-2006 Rogue Wave Software.
+; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+                .code
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int8_t __rw_atomic_xchg8 (int8_t *x, int8_t y);
+;
+; Atomically assigns the 8-bit value y to *x and returns
+; the original (before assignment) 8-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_xchg8
+__rw_atomic_xchg8 proc                    ; int8_t (int8_t *x, int8_t y)
+                                          ; %rcx = x
+ mov al, dl ; %al = y + xchg al, [rcx] ; %al <-> (%rcx)
+                ret
+__rw_atomic_xchg8 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int16_t __rw_atomic_xchg16 (int16_t *x, int16_t y);
+;
+; Atomically assigns the 16-bit value y to *x and returns
+; the original (before assignment) 16-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_xchg16
+__rw_atomic_xchg16 proc                   ; int16_t (int16_t *x, int16_t y)
+                                          ; %rcx = x
+                mov     ax, dx            ; %ax = y
+                xchg    ax, [rcx]         ; %ax <-> (%rcx)
+                ret
+__rw_atomic_xchg16 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int32_t __rw_atomic_xchg32 (int32_t *x, int32_t y);
+;
+; Atomically assigns the 32-bit value y to *x and returns
+; the original (before assignment) 32-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_xchg32
+__rw_atomic_xchg32 proc                   ; int32_t (int32_t *x, int32_t y)
+                                          ; %rcx = x
+                mov     eax, edx          ; %eax = y
+                xchg    eax, [rcx]        ; %eax <-> (%rcx)
+                ret
+__rw_atomic_xchg32 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int32_t __rw_atomic_xchg64 (int64_t *x, int64_t y);
+;
+; Atomically assigns the 64-bit value y to *x and returns
+; the original (before assignment) 64-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_xchg64
+__rw_atomic_xchg64 proc                   ; int64_t (int64_t *x, int64_t y)
+                                          ; %rcx = x
+                mov     rax, rdx          ; %rax = y
+                xchg    rax, [rcx]        ; %rax <-> (%rcx)
+                ret
+__rw_atomic_xchg64 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int8_t __rw_atomic_add8 (int8_t *x, int8_t y);
+;
+; Atomically increments the 8-bit value *x by y and returns
+; the new (after increment) 8-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_add8
+__rw_atomic_add8 proc                     ; int8_t (int8_t *dst, int8_t inc)
+                                          ; %rcx = dst
+                mov     eax, edx          ; %eax = inc
+
+                lock xadd [rcx], al       ; tmp = *dst
+ ; dst += inc + ; %al = tmp + add eax, edx ; return %al + inc
+                ret
+__rw_atomic_add8 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int16_t __rw_atomic_add16 (int16_t *x, int16_t y);
+;
+; Atomically increments the 16-bit value *x by y and returns
+; the new (after increment) 16-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_add16
+__rw_atomic_add16 proc                    ; int16_t (int16_t *dst, int16_t inc)
+                                          ; %rcx = dst
+ mov ax, dx ; %ax = inc +
+                lock xadd [rcx], ax       ; tmp = *dst
+                                          ; dst += inc
+ ; eax = tmp +
+                add     ax,  dx           ; return %ax + inc
+                ret
+__rw_atomic_add16 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int32_t __rw_atomic_add32 (int32_t *x, int32_t y);
+;
+; Atomically increments the 32-bit value *x by y and returns
+; the new (after increment) 32-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_add32
+__rw_atomic_add32 proc                    ; int32_t (int32_t *dst, int32_t inc)
+                                          ; %rcx = dst
+                mov     eax, edx          ; %eax = inc
+
+                lock xadd [rcx], eax      ; tmp = *dst
+                                          ; dst += inc
+                                          ; %eax = tmp
+
+                add     eax, edx          ; return %eax + inc
+                ret
+__rw_atomic_add32 endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern "C" int64_t __rw_atomic_add64 (int64_t *x, int64_t y);
+;
+; Atomically increments the 32-bit value *x by y and returns
+; the new (after increment) 32-bit value of *x.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+                align 16
+                public __rw_atomic_add64
+__rw_atomic_add64 proc                    ; int64_t (int64_t *dst, int64_t inc)
+                                          ; %rcx = dst
+                mov     rax, rdx          ; %eax = inc
+
+                lock xadd [rcx], rax      ; tmp = *dst
+                                          ; dst += inc
+                                          ; %eax = tmp
+
+                add     rax, rdx          ; return %eax + inc
+                ret
+__rw_atomic_add64 endp
+
+                end

Property changes on: src\i86_64\atomic.asm
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Index: src/once.cpp
===================================================================
--- src/once.cpp        (revision 570339)
+++ src/once.cpp        (working copy)
@@ -188,3 +188,32 @@
 }   // extern "C"
} // namespace __rw
+
+// export __rw_atomic_xxx() functions, defined in atomic.asm
+#if defined (_WIN32) && defined (_DLL)
+
+#  if defined (_M_IX86)
+
+#    pragma comment(linker, "/EXPORT:___rw_atomic_add8")
+#    pragma comment(linker, "/EXPORT:___rw_atomic_add16")
+#    pragma comment(linker, "/EXPORT:___rw_atomic_add32")
+#    pragma comment(linker, "/EXPORT:___rw_atomic_xchg8")
+#    pragma comment(linker, "/EXPORT:___rw_atomic_xchg16")
+#    pragma comment(linker, "/EXPORT:___rw_atomic_xchg32")
+
+#  elif defined (_M_X64)
+
+#    pragma comment(linker, "/EXPORT:__rw_atomic_add8")
+#    pragma comment(linker, "/EXPORT:__rw_atomic_add16")
+#    pragma comment(linker, "/EXPORT:__rw_atomic_add32")
+#    pragma comment(linker, "/EXPORT:__rw_atomic_xchg8")
+#    pragma comment(linker, "/EXPORT:__rw_atomic_xchg16")
+#    pragma comment(linker, "/EXPORT:__rw_atomic_xchg32")
+
+#    ifdef _RWSTD_INT64_T
+#      pragma comment(linker, "/EXPORT:__rw_atomic_add64")
+#      pragma comment(linker, "/EXPORT:__rw_atomic_xchg64")
+#    endif   // _RWSTD_INT64_T
+#  endif   // _M_IX86
+
+#endif   // _WIN32 && _DLL

Reply via email to