Author: Armin Rigo <[email protected]>
Branch: c7-refactor
Changeset: r736:3906e3d066b4
Date: 2014-02-14 18:30 +0100
http://bitbucket.org/pypy/stmgc/changeset/3906e3d066b4/
Log: In-progress: redoing the page privatization
diff --git a/c7/stm/atomic.h b/c7/stm/atomic.h
--- a/c7/stm/atomic.h
+++ b/c7/stm/atomic.h
@@ -1,12 +1,34 @@
-#if defined(__i386__) || defined(__x86_64__)
+/* spin_loop() corresponds to the PAUSE instruction on x86. On
+ other architectures, we generate no instruction (but still need
+ the compiler barrier); if on another architecture you find the
+ corresponding instruction, feel free to add it here.
+*/
+
+/* write_fence() is a function that inserts a "write fence". The
+ goal is to make sure that past writes are really pushed to memory
+ before the future writes. We assume that the corresponding "read
+ fence" effect is done automatically by a corresponding
+ __sync_bool_compare_and_swap().
+
+ On x86, this is done automatically by the CPU; we only need a
+ compiler barrier (asm("memory")).
+
+ On other architectures, we use __sync_synchronize() as a general
+ fall-back, but we might have more efficient alternative on some other
+ platforms too.
+*/
+
+
+#if defined(__i386__) || defined(__amd64__)
# define HAVE_FULL_EXCHANGE_INSN
static inline void spin_loop(void) { asm("pause" : : : "memory"); }
+ static inline void write_fence(void) { asm("" : : : "memory"); }
#else
-# warn "Add a correct definition of spin_loop() for this platform?"
static inline void spin_loop(void) { asm("" : : : "memory"); }
+ static inline void write_fence(void) { __sync_synchronize(); }
#endif
diff --git a/c7/stm/pagecopy.c b/c7/stm/pagecopy.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/pagecopy.c
@@ -0,0 +1,57 @@
+
+static void pagecopy(void *dest, const void *src)
+{
+ unsigned long i;
+ for (i=0; i<4096/128; i++) {
+ asm volatile("movdqa (%0), %%xmm0\n"
+ "movdqa 16(%0), %%xmm1\n"
+ "movdqa 32(%0), %%xmm2\n"
+ "movdqa 48(%0), %%xmm3\n"
+ "movdqa %%xmm0, (%1)\n"
+ "movdqa %%xmm1, 16(%1)\n"
+ "movdqa %%xmm2, 32(%1)\n"
+ "movdqa %%xmm3, 48(%1)\n"
+ "movdqa 64(%0), %%xmm0\n"
+ "movdqa 80(%0), %%xmm1\n"
+ "movdqa 96(%0), %%xmm2\n"
+ "movdqa 112(%0), %%xmm3\n"
+ "movdqa %%xmm0, 64(%1)\n"
+ "movdqa %%xmm1, 80(%1)\n"
+ "movdqa %%xmm2, 96(%1)\n"
+ "movdqa %%xmm3, 112(%1)\n"
+ :
+ : "r"(src + 128*i), "r"(dest + 128*i)
+ : "xmm0", "xmm1", "xmm2", "xmm3", "memory");
+ }
+}
+
+#if 0 /* XXX enable if detected on the cpu */
+static void pagecopy_ymm8(void *dest, const void *src)
+{
+ asm volatile("0:\n"
+ "vmovdqa (%0), %%ymm0\n"
+ "vmovdqa 32(%0), %%ymm1\n"
+ "vmovdqa 64(%0), %%ymm2\n"
+ "vmovdqa 96(%0), %%ymm3\n"
+ "vmovdqa 128(%0), %%ymm4\n"
+ "vmovdqa 160(%0), %%ymm5\n"
+ "vmovdqa 192(%0), %%ymm6\n"
+ "vmovdqa 224(%0), %%ymm7\n"
+ "addq $256, %0\n"
+ "vmovdqa %%ymm0, (%1)\n"
+ "vmovdqa %%ymm1, 32(%1)\n"
+ "vmovdqa %%ymm2, 64(%1)\n"
+ "vmovdqa %%ymm3, 96(%1)\n"
+ "vmovdqa %%ymm4, 128(%1)\n"
+ "vmovdqa %%ymm5, 160(%1)\n"
+ "vmovdqa %%ymm6, 192(%1)\n"
+ "vmovdqa %%ymm7, 224(%1)\n"
+ "addq $256, %1\n"
+ "cmpq %2, %0\n"
+ "jne 0b"
+ : "=r"(src), "=r"(dest)
+ : "r"((char *)src + 4096), "0"(src), "1"(dest)
+ : "xmm0", "xmm1", "xmm2", "xmm3",
+ "xmm4", "xmm5", "xmm6", "xmm7");
+}
+#endif
diff --git a/c7/stm/pagecopy.h b/c7/stm/pagecopy.h
new file mode 100644
--- /dev/null
+++ b/c7/stm/pagecopy.h
@@ -0,0 +1,2 @@
+
+static void pagecopy(void *dest, const void *src);
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -58,11 +58,11 @@
ssize_t pgoff1 = pagenum;
ssize_t pgoff2 = pagenum + NB_PAGES;
- ssize_t localpgoff = pgoff1 + NB_PAGES * _STM_TL->thread_num;
- ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - _STM_TL->thread_num);
+ ssize_t localpgoff = pgoff1 + NB_PAGES * STM_SEGMENT->segment_num;
+ ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - STM_SEGMENT->segment_num);
- void *localpg = object_pages + localpgoff * 4096UL;
- void *otherpg = object_pages + otherpgoff * 4096UL;
+ void *localpg = stm_object_pages + localpgoff * 4096UL;
+ void *otherpg = stm_object_pages + otherpgoff * 4096UL;
// XXX should not use pgoff2, but instead the next unused page in
// thread 2, so that after major GCs the next dirty pages are the
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -3,6 +3,7 @@
#include "stm/atomic.h"
#include "stm/list.h"
#include "stm/core.h"
+#include "stm/pagecopy.h"
#include "stm/pages.h"
#include "stm/gcpage.h"
#include "stm/sync.h"
@@ -10,6 +11,7 @@
#include "stm/misc.c"
#include "stm/list.c"
+#include "stm/pagecopy.c"
#include "stm/pages.c"
#include "stm/prebuilt.c"
#include "stm/gcpage.c"
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit