Author: Armin Rigo <[email protected]>
Branch: c7-refactor
Changeset: r719:b107802731ce
Date: 2014-02-10 18:00 +0100
http://bitbucket.org/pypy/stmgc/changeset/b107802731ce/
Log: in-progress
diff --git a/c7/pages.c b/c7/pages.c
deleted file mode 100644
--- a/c7/pages.c
+++ /dev/null
@@ -1,157 +0,0 @@
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <asm/prctl.h>
-#include <sys/prctl.h>
-#include <pthread.h>
-
-
-#include "core.h"
-#include "list.h"
-#include "pages.h"
-#include "pagecopy.h"
-
-
-#if defined(__i386__) || defined(__x86_64__)
-# define HAVE_FULL_EXCHANGE_INSN
-#endif
-
-
-uintptr_t index_page_never_used;
-uint8_t flag_page_private[NB_PAGES];
-
-volatile uint8_t list_lock = 0;
-struct stm_list_s *single_page_list;
-
-
-void _stm_reset_pages()
-{
- assert(!list_lock);
- if (!single_page_list)
- single_page_list = stm_list_create();
- else
- stm_list_clear(single_page_list);
-
- index_page_never_used = FIRST_AFTER_NURSERY_PAGE;
-
- memset(flag_page_private, 0, sizeof(flag_page_private));
-}
-
-uint8_t stm_get_page_flag(int pagenum)
-{
- return flag_page_private[pagenum];
-}
-
-void stm_set_page_flag(int pagenum, uint8_t flag)
-{
- assert(flag_page_private[pagenum] != flag);
- flag_page_private[pagenum] = flag;
-}
-
-
-void stm_pages_privatize(uintptr_t pagenum)
-{
- if (flag_page_private[pagenum] == PRIVATE_PAGE)
- return;
-
-#ifdef HAVE_FULL_EXCHANGE_INSN
- /* use __sync_lock_test_and_set() as a cheaper alternative to
- __sync_bool_compare_and_swap(). */
- int previous = __sync_lock_test_and_set(&flag_page_private[pagenum],
- REMAPPING_PAGE);
- if (previous == PRIVATE_PAGE) {
- flag_page_private[pagenum] = PRIVATE_PAGE;
- return;
- }
- bool was_shared = (previous == SHARED_PAGE);
-#else
- bool was_shared = __sync_bool_compare_and_swap(&flag_page_private[pagenum],
- SHARED_PAGE, REMAPPING_PAGE);
-#endif
- if (!was_shared) {
- while (1) {
- uint8_t state = ((uint8_t volatile *)flag_page_private)[pagenum];
- if (state != REMAPPING_PAGE) {
- assert(state == PRIVATE_PAGE);
- break;
- }
- spin_loop();
- }
- return;
- }
-
- ssize_t pgoff1 = pagenum;
- ssize_t pgoff2 = pagenum + NB_PAGES;
- ssize_t localpgoff = pgoff1 + NB_PAGES * _STM_TL->thread_num;
- ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - _STM_TL->thread_num);
-
- void *localpg = object_pages + localpgoff * 4096UL;
- void *otherpg = object_pages + otherpgoff * 4096UL;
-
- // XXX should not use pgoff2, but instead the next unused page in
- // thread 2, so that after major GCs the next dirty pages are the
- // same as the old ones
- int res = remap_file_pages(localpg, 4096, 0, pgoff2, 0);
- if (res < 0) {
- perror("remap_file_pages");
- abort();
- }
- pagecopy(localpg, otherpg);
- write_fence();
- assert(flag_page_private[pagenum] == REMAPPING_PAGE);
- flag_page_private[pagenum] = PRIVATE_PAGE;
-}
-
-
-
-uintptr_t stm_pages_reserve(int num)
-{
- /* grab free, possibly uninitialized pages */
- if (num == 1 && !stm_list_is_empty(single_page_list)) {
- uint8_t previous;
- while ((previous = __sync_lock_test_and_set(&list_lock, 1)))
- spin_loop();
-
- if (!stm_list_is_empty(single_page_list)) {
- uintptr_t res = (uintptr_t)stm_list_pop_item(single_page_list);
- list_lock = 0;
- return res;
- }
-
- list_lock = 0;
- }
-
- /* Return the index'th object page, which is so far never used. */
- uintptr_t index = __sync_fetch_and_add(&index_page_never_used, num);
-
- int i;
- for (i = 0; i < num; i++) {
- assert(flag_page_private[index+i] == SHARED_PAGE);
- }
-
- if (index + num >= NB_PAGES) {
- fprintf(stderr, "Out of mmap'ed memory!\n");
- abort();
- }
- return index;
-}
-
-void stm_pages_unreserve(uintptr_t pagenum)
-{
- uint8_t previous;
- while ((previous = __sync_lock_test_and_set(&list_lock, 1)))
- spin_loop();
-
- flag_page_private[pagenum] = SHARED_PAGE;
- LIST_APPEND(single_page_list, (object_t*)pagenum);
-
- list_lock = 0;
-}
-
-
-
diff --git a/c7/pages.h b/c7/pages.h
deleted file mode 100644
--- a/c7/pages.h
+++ /dev/null
@@ -1,21 +0,0 @@
-enum {
- /* unprivatized page seen by all threads */
- SHARED_PAGE=0,
-
- /* page being in the process of privatization */
- REMAPPING_PAGE,
-
- /* page private for each thread */
- PRIVATE_PAGE,
-}; /* flag_page_private */
-
-
-void stm_pages_privatize(uintptr_t pagenum);
-uintptr_t stm_pages_reserve(int num);
-uint8_t stm_get_page_flag(int pagenum);
-void stm_set_page_flag(int pagenum, uint8_t flag);
-void _stm_reset_pages(void);
-void stm_pages_unreserve(uintptr_t num);
-
-
-
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -8,9 +8,48 @@
abort();
}
+static void reset_transaction_read_version(void)
+{
+ /* force-reset all read markers to 0 */
+
+ /* XXX measure the time taken by this madvise() and the following
+ zeroing of pages done lazily by the kernel; compare it with using
+ 16-bit read_versions.
+ */
+ /* XXX try to use madvise() on smaller ranges of memory. In my
+ measures, we could gain a factor 2 --- not really more, even if
+ the range of virtual addresses below is very large, as long as it
+ is already mostly non-reserved pages. (The following call keeps
+ them non-reserved; apparently the kernel just skips them very
+ quickly.)
+ */
+ char *readmarkers = REAL_ADDRESS(STM_SEGMENT->segment_base,
+ FIRST_READMARKER_PAGE * 4096UL);
+ if (madvise(readmarkers, NB_READMARKER_PAGES * 4096UL,
+ MADV_DONTNEED) < 0) {
+ perror("madvise");
+ abort();
+ }
+ STM_SEGMENT->transaction_read_version = 1;
+}
+
void stm_start_transaction(stm_thread_local_t *tl, stm_jmpbuf_t *jmpbuf)
{
/* GS invalid before this point! */
- _stm_stop_safe_point(LOCK_COLLECT|THREAD_YIELD);
-
+ acquire_thread_segment(tl);
+
+ STM_SEGMENT->jmpbuf_ptr = jmpbuf;
+
+ uint8_t old_rv = STM_SEGMENT->transaction_read_version;
+ STM_SEGMENT->transaction_read_version = old_rv + 1;
+ if (UNLIKELY(old_rv == 0xff))
+ reset_transaction_read_version();
}
+
+
+void stm_commit_transaction(void)
+{
+ stm_thread_local_t *tl = STM_SEGMENT->running_thread;
+ release_thread_segment(tl);
+ abort();
+}
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
+#include <errno.h>
#define NB_PAGES (1500*256) // 1500MB
@@ -12,16 +13,14 @@
#define LARGE_OBJECT_WORDS 36
#define NB_NURSERY_PAGES 1024 // 4MB
-#define NURSERY_SECTION_SIZE (24*4096)
-
-
#define TOTAL_MEMORY (NB_PAGES * 4096UL * NB_SEGMENTS)
#define READMARKER_END ((NB_PAGES * 4096UL) >> 4)
#define FIRST_OBJECT_PAGE ((READMARKER_END + 4095) / 4096UL)
#define FIRST_NURSERY_PAGE FIRST_OBJECT_PAGE
+#define END_NURSERY_PAGE (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES)
#define READMARKER_START ((FIRST_OBJECT_PAGE * 4096UL) >> 4)
#define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
-#define END_NURSERY_PAGE (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES)
+#define NB_READMARKER_PAGES (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
enum {
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -3,12 +3,6 @@
#endif
-stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
-{
- abort();
-}
-
-
object_t *stm_allocate_prebuilt(ssize_t size_rounded_up)
{
abort();
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -24,6 +24,17 @@
return (object_t*)res;
}
+struct stm_priv_segment_info_s *_stm_segment(void)
+{
+ char *info = REAL_ADDRESS(STM_SEGMENT->segment_base, STM_PSEGMENT);
+ return (struct stm_priv_segment_info_s *)info;
+}
+
+stm_thread_local_t *_stm_thread(void)
+{
+ return STM_SEGMENT->running_thread;
+}
+
bool _stm_was_read(object_t *obj)
{
return ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm ==
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/nursery.c
@@ -0,0 +1,63 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+/************************************************************/
+
+#define NURSERY_SIZE (NB_NURSERY_PAGES * 4096UL)
+
+/* an object larger than LARGE_OBJECT will never be allocated in
+ the nursery. */
+#define LARGE_OBJECT (65*1024)
+
+/* the nursery is divided in "sections" this big. Each section is
+ allocated to a single running thread. */
+#define NURSERY_SECTION_SIZE (128*1024)
+
+/* if objects are larger than this limit but smaller than LARGE_OBJECT,
+ then they might be allocted outside sections but still in the nursery. */
+#define MEDIUM_OBJECT (9*1024)
+
+/************************************************************/
+
+static union {
+ struct {
+ uint64_t used; /* number of bytes from the nursery used so far */
+ };
+ char reserved[64];
+} nursery_ctl __attribute__((aligned(64)));
+
+/************************************************************/
+
+static void setup_nursery(void)
+{
+ assert(MEDIUM_OBJECT < LARGE_OBJECT);
+ assert(LARGE_OBJECT < NURSERY_SECTION_SIZE);
+ nursery_ctl.used = 0;
+}
+
+
+static stm_char *allocate_from_nursery(uint64_t bytes)
+{
+ /* thread-safe; allocate a chunk of memory from the nursery */
+ uint64_t p = __sync_fetch_and_add(&nursery_ctl.used, bytes);
+ if (p + bytes > NURSERY_SIZE) {
+ //major_collection();
+ abort();
+ }
+ return (stm_char *)(FIRST_NURSERY_PAGE * 4096UL + p);
+}
+
+
+stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
+{
+ if (size_rounded_up < MEDIUM_OBJECT) {
+ /* This is a small object. The current section is simply full.
+ Allocate the next section. */
+ stm_char *p = allocate_from_nursery(NURSERY_SECTION_SIZE);
+ STM_SEGMENT->nursery_current = p + size_rounded_up;
+ STM_SEGMENT->nursery_section_end = (uintptr_t)p + NURSERY_SECTION_SIZE;
+ return p;
+ }
+ abort();
+}
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -19,9 +19,8 @@
abort();
}
}
- for (; count > 0; count--) {
- flag_page_private[pagenum++] = SHARED_PAGE;
- }
+ for (i = 0; i < count; i++)
+ flag_page_private[pagenum + i] = SHARED_PAGE;
}
static void _pages_privatize(uintptr_t pagenum, uintptr_t count)
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -59,6 +59,9 @@
long time for each page. */
pages_initialize_shared(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
+ setup_sync();
+ setup_nursery();
+
#if 0
stm_largemalloc_init(heap, HEAP_PAGES * 4096UL);
#endif
@@ -72,6 +75,8 @@
stm_object_pages = NULL;
memset(flag_page_private, 0, sizeof(flag_page_private));
+
+ teardown_sync();
}
void stm_register_thread_local(stm_thread_local_t *tl)
@@ -85,7 +90,7 @@
stm_thread_locals->prev->next = tl;
stm_thread_locals->prev = tl;
}
- tl->associated_segment = get_segment(0);
+ tl->associated_segment_num = -1;
}
void stm_unregister_thread_local(stm_thread_local_t *tl)
@@ -99,4 +104,7 @@
}
tl->prev->next = tl->next;
tl->next->prev = tl->prev;
+ tl->prev = NULL;
+ tl->next = NULL;
+ tl->associated_segment_num = -1;
}
diff --git a/c7/stm/sync.c b/c7/stm/sync.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/sync.c
@@ -0,0 +1,82 @@
+#include <semaphore.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <asm/prctl.h>
+
+
+static union {
+ struct {
+ sem_t semaphore;
+ uint8_t in_use[NB_SEGMENTS]; /* 1 if running a pthread */
+ };
+ char reserved[64];
+} segments_ctl __attribute__((aligned(64)));
+
+
+static void setup_sync(void)
+{
+ memset(segments_ctl.in_use, 0, sizeof(segments_ctl.in_use));
+ if (sem_init(&segments_ctl.semaphore, 0, NB_SEGMENTS) != 0) {
+ perror("sem_init");
+ abort();
+ }
+}
+
+static void teardown_sync(void)
+{
+ if (sem_destroy(&segments_ctl.semaphore) != 0) {
+ perror("sem_destroy");
+ abort();
+ }
+}
+
+static void set_gs_register(char *value)
+{
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, (uint64_t)value) != 0) {
+ perror("syscall(arch_prctl, ARCH_SET_GS)");
+ abort();
+ }
+}
+
+static void acquire_thread_segment(stm_thread_local_t *tl)
+{
+ /* This function acquires a segment for the currently running thread,
+ and set up the GS register if it changed. */
+ while (sem_wait(&segments_ctl.semaphore) != 0) {
+ if (errno != EINTR) {
+ perror("sem_wait");
+ abort();
+ }
+ }
+ int num = tl->associated_segment_num;
+ if (num >= 0) {
+ if (__sync_lock_test_and_set(&segments_ctl.in_use[num], 1) == 0) {
+ /* fast-path: reacquired the same segment number than the one
+ we had. The value stored in GS is still valid. */
+ goto exit;
+ }
+ }
+ /* Look for the next free segment. There must be one, because we
+ acquired the semaphore above. */
+ while (1) {
+ num = (num + 1) % NB_SEGMENTS;
+ if (__sync_lock_test_and_set(&segments_ctl.in_use[num], 1) == 0)
+ break;
+ }
+ tl->associated_segment_num = num;
+ set_gs_register(get_segment_base(num));
+
+ exit:
+ assert(STM_SEGMENT->running_thread == NULL);
+ STM_SEGMENT->running_thread = tl;
+}
+
+static void release_thread_segment(stm_thread_local_t *tl)
+{
+ assert(STM_SEGMENT->running_thread == tl);
+ STM_SEGMENT->running_thread = NULL;
+
+ int num = tl->associated_segment_num;
+ __sync_lock_release(&segments_ctl.in_use[num]);
+ sem_post(&segments_ctl.semaphore);
+}
diff --git a/c7/stm/sync.h b/c7/stm/sync.h
new file mode 100644
--- /dev/null
+++ b/c7/stm/sync.h
@@ -0,0 +1,8 @@
+
+
+static void setup_sync(void);
+static void teardown_sync(void);
+
+/* acquire and release one of the segments for running the given thread */
+static void acquire_thread_segment(stm_thread_local_t *tl);
+static void release_thread_segment(stm_thread_local_t *tl);
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -2,9 +2,12 @@
#include "stmgc.h"
#include "stm/core.h"
#include "stm/pages.h"
+#include "stm/sync.h"
#include "stm/misc.c"
-#include "stm/core.c"
#include "stm/pages.c"
#include "stm/gcpage.c"
+#include "stm/nursery.c"
+#include "stm/sync.c"
#include "stm/setup.c"
+#include "stm/core.c"
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -55,7 +55,7 @@
/* every thread should handle the shadow stack itself */
object_t **shadowstack, **shadowstack_base;
/* the next fields are handled automatically by the library */
- struct stm_segment_info_s *associated_segment;
+ int associated_segment_num;
struct stm_thread_local_s *prev, *next;
} stm_thread_local_t;
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit