[pypy-commit] stmgc c7-refactor: In-progress: redoing the page privatization

arigo Fri, 14 Feb 2014 09:31:52 -0800

Author: Armin Rigo <[email protected]>
Branch: c7-refactor
Changeset: r736:3906e3d066b4
Date: 2014-02-14 18:30 +0100
http://bitbucket.org/pypy/stmgc/changeset/3906e3d066b4/


Log:    In-progress: redoing the page privatization

diff --git a/c7/stm/atomic.h b/c7/stm/atomic.h
--- a/c7/stm/atomic.h
+++ b/c7/stm/atomic.h
@@ -1,12 +1,34 @@
 
-#if defined(__i386__) || defined(__x86_64__)
+/* spin_loop() corresponds to the PAUSE instruction on x86.  On
+   other architectures, we generate no instruction (but still need
+   the compiler barrier); if on another architecture you find the
+   corresponding instruction, feel free to add it here.
+*/
+
+/* write_fence() is a function that inserts a "write fence".  The
+   goal is to make sure that past writes are really pushed to memory
+   before the future writes.  We assume that the corresponding "read
+   fence" effect is done automatically by a corresponding
+   __sync_bool_compare_and_swap().
+
+   On x86, this is done automatically by the CPU; we only need a
+   compiler barrier (asm("memory")).
+
+   On other architectures, we use __sync_synchronize() as a general
+   fall-back, but we might have more efficient alternative on some other
+   platforms too.
+*/
+
+
+#if defined(__i386__) || defined(__amd64__)
 
 # define HAVE_FULL_EXCHANGE_INSN
   static inline void spin_loop(void) { asm("pause" : : : "memory"); }
+  static inline void write_fence(void) { asm("" : : : "memory"); }
 
 #else
 
-# warn "Add a correct definition of spin_loop() for this platform?"
   static inline void spin_loop(void) { asm("" : : : "memory"); }
+  static inline void write_fence(void) { __sync_synchronize(); }
 
 #endif
diff --git a/c7/stm/pagecopy.c b/c7/stm/pagecopy.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/pagecopy.c
@@ -0,0 +1,57 @@
+
+static void pagecopy(void *dest, const void *src)
+{
+    unsigned long i;
+    for (i=0; i<4096/128; i++) {
+        asm volatile("movdqa (%0), %%xmm0\n"
+                     "movdqa 16(%0), %%xmm1\n"
+                     "movdqa 32(%0), %%xmm2\n"
+                     "movdqa 48(%0), %%xmm3\n"
+                     "movdqa %%xmm0, (%1)\n"
+                     "movdqa %%xmm1, 16(%1)\n"
+                     "movdqa %%xmm2, 32(%1)\n"
+                     "movdqa %%xmm3, 48(%1)\n"
+                     "movdqa 64(%0), %%xmm0\n"
+                     "movdqa 80(%0), %%xmm1\n"
+                     "movdqa 96(%0), %%xmm2\n"
+                     "movdqa 112(%0), %%xmm3\n"
+                     "movdqa %%xmm0, 64(%1)\n"
+                     "movdqa %%xmm1, 80(%1)\n"
+                     "movdqa %%xmm2, 96(%1)\n"
+                     "movdqa %%xmm3, 112(%1)\n"
+                     :
+                     : "r"(src + 128*i), "r"(dest + 128*i)
+                     : "xmm0", "xmm1", "xmm2", "xmm3", "memory");
+    }
+}
+
+#if 0   /* XXX enable if detected on the cpu */
+static void pagecopy_ymm8(void *dest, const void *src)
+{
+    asm volatile("0:\n"
+                 "vmovdqa (%0), %%ymm0\n"
+                 "vmovdqa 32(%0), %%ymm1\n"
+                 "vmovdqa 64(%0), %%ymm2\n"
+                 "vmovdqa 96(%0), %%ymm3\n"
+                 "vmovdqa 128(%0), %%ymm4\n"
+                 "vmovdqa 160(%0), %%ymm5\n"
+                 "vmovdqa 192(%0), %%ymm6\n"
+                 "vmovdqa 224(%0), %%ymm7\n"
+                 "addq $256, %0\n"
+                 "vmovdqa %%ymm0, (%1)\n"
+                 "vmovdqa %%ymm1, 32(%1)\n"
+                 "vmovdqa %%ymm2, 64(%1)\n"
+                 "vmovdqa %%ymm3, 96(%1)\n"
+                 "vmovdqa %%ymm4, 128(%1)\n"
+                 "vmovdqa %%ymm5, 160(%1)\n"
+                 "vmovdqa %%ymm6, 192(%1)\n"
+                 "vmovdqa %%ymm7, 224(%1)\n"
+                 "addq $256, %1\n"
+                 "cmpq %2, %0\n"
+                 "jne 0b"
+                 : "=r"(src), "=r"(dest)
+                 : "r"((char *)src + 4096), "0"(src), "1"(dest)
+                 : "xmm0", "xmm1", "xmm2", "xmm3",
+                   "xmm4", "xmm5", "xmm6", "xmm7");
+}
+#endif
diff --git a/c7/stm/pagecopy.h b/c7/stm/pagecopy.h
new file mode 100644
--- /dev/null
+++ b/c7/stm/pagecopy.h
@@ -0,0 +1,2 @@
+
+static void pagecopy(void *dest, const void *src);
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -58,11 +58,11 @@
 
     ssize_t pgoff1 = pagenum;
     ssize_t pgoff2 = pagenum + NB_PAGES;
-    ssize_t localpgoff = pgoff1 + NB_PAGES * _STM_TL->thread_num;
-    ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - _STM_TL->thread_num);
+    ssize_t localpgoff = pgoff1 + NB_PAGES * STM_SEGMENT->segment_num;
+    ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - STM_SEGMENT->segment_num);
 
-    void *localpg = object_pages + localpgoff * 4096UL;
-    void *otherpg = object_pages + otherpgoff * 4096UL;
+    void *localpg = stm_object_pages + localpgoff * 4096UL;
+    void *otherpg = stm_object_pages + otherpgoff * 4096UL;
 
     // XXX should not use pgoff2, but instead the next unused page in
     // thread 2, so that after major GCs the next dirty pages are the
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -3,6 +3,7 @@
 #include "stm/atomic.h"
 #include "stm/list.h"
 #include "stm/core.h"
+#include "stm/pagecopy.h"
 #include "stm/pages.h"
 #include "stm/gcpage.h"
 #include "stm/sync.h"
@@ -10,6 +11,7 @@
 
 #include "stm/misc.c"
 #include "stm/list.c"
+#include "stm/pagecopy.c"
 #include "stm/pages.c"
 #include "stm/prebuilt.c"
 #include "stm/gcpage.c"
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] stmgc c7-refactor: In-progress: redoing the page privatization

Reply via email to