Author: Armin Rigo <[email protected]>
Branch: c7-refactor
Changeset: r719:b107802731ce
Date: 2014-02-10 18:00 +0100
http://bitbucket.org/pypy/stmgc/changeset/b107802731ce/

Log:    in-progress

diff --git a/c7/pages.c b/c7/pages.c
deleted file mode 100644
--- a/c7/pages.c
+++ /dev/null
@@ -1,157 +0,0 @@
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <asm/prctl.h>
-#include <sys/prctl.h>
-#include <pthread.h>
-
-
-#include "core.h"
-#include "list.h"
-#include "pages.h"
-#include "pagecopy.h"
-
-
-#if defined(__i386__) || defined(__x86_64__)
-#  define HAVE_FULL_EXCHANGE_INSN
-#endif
-
-
-uintptr_t index_page_never_used;
-uint8_t flag_page_private[NB_PAGES];
-
-volatile uint8_t list_lock = 0;
-struct stm_list_s *single_page_list;
-
-
-void _stm_reset_pages()
-{
-    assert(!list_lock);
-    if (!single_page_list)
-        single_page_list = stm_list_create();
-    else
-        stm_list_clear(single_page_list);
-
-    index_page_never_used = FIRST_AFTER_NURSERY_PAGE;
-    
-    memset(flag_page_private, 0, sizeof(flag_page_private));
-}
-
-uint8_t stm_get_page_flag(int pagenum)
-{
-    return flag_page_private[pagenum];
-}
-
-void stm_set_page_flag(int pagenum, uint8_t flag)
-{
-    assert(flag_page_private[pagenum] != flag);
-    flag_page_private[pagenum] = flag;
-}
-
-
-void stm_pages_privatize(uintptr_t pagenum)
-{
-    if (flag_page_private[pagenum] == PRIVATE_PAGE)
-        return;
-
-#ifdef HAVE_FULL_EXCHANGE_INSN
-    /* use __sync_lock_test_and_set() as a cheaper alternative to
-       __sync_bool_compare_and_swap(). */
-    int previous = __sync_lock_test_and_set(&flag_page_private[pagenum],
-                                            REMAPPING_PAGE);
-    if (previous == PRIVATE_PAGE) {
-        flag_page_private[pagenum] = PRIVATE_PAGE;
-        return;
-    }
-    bool was_shared = (previous == SHARED_PAGE);
-#else
-    bool was_shared = __sync_bool_compare_and_swap(&flag_page_private[pagenum],
-                                                  SHARED_PAGE, REMAPPING_PAGE);
-#endif
-    if (!was_shared) {
-        while (1) {
-            uint8_t state = ((uint8_t volatile *)flag_page_private)[pagenum];
-            if (state != REMAPPING_PAGE) {
-                assert(state == PRIVATE_PAGE);
-                break;
-            }
-            spin_loop();
-        }
-        return;
-    }
-
-    ssize_t pgoff1 = pagenum;
-    ssize_t pgoff2 = pagenum + NB_PAGES;
-    ssize_t localpgoff = pgoff1 + NB_PAGES * _STM_TL->thread_num;
-    ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - _STM_TL->thread_num);
-
-    void *localpg = object_pages + localpgoff * 4096UL;
-    void *otherpg = object_pages + otherpgoff * 4096UL;
-
-    // XXX should not use pgoff2, but instead the next unused page in
-    // thread 2, so that after major GCs the next dirty pages are the
-    // same as the old ones
-    int res = remap_file_pages(localpg, 4096, 0, pgoff2, 0);
-    if (res < 0) {
-        perror("remap_file_pages");
-        abort();
-    }
-    pagecopy(localpg, otherpg);
-    write_fence();
-    assert(flag_page_private[pagenum] == REMAPPING_PAGE);
-    flag_page_private[pagenum] = PRIVATE_PAGE;
-}
-
-
-
-uintptr_t stm_pages_reserve(int num)
-{
-    /* grab free, possibly uninitialized pages */
-    if (num == 1 && !stm_list_is_empty(single_page_list)) {
-        uint8_t previous;
-        while ((previous = __sync_lock_test_and_set(&list_lock, 1)))
-            spin_loop();
-        
-        if (!stm_list_is_empty(single_page_list)) {
-            uintptr_t res = (uintptr_t)stm_list_pop_item(single_page_list);
-            list_lock = 0;
-            return res;
-        }
-        
-        list_lock = 0;
-    }
-
-    /* Return the index'th object page, which is so far never used. */
-    uintptr_t index = __sync_fetch_and_add(&index_page_never_used, num);
-
-    int i;
-    for (i = 0; i < num; i++) {
-        assert(flag_page_private[index+i] == SHARED_PAGE);
-    }
-
-    if (index + num >= NB_PAGES) {
-        fprintf(stderr, "Out of mmap'ed memory!\n");
-        abort();
-    }
-    return index;
-}
-
-void stm_pages_unreserve(uintptr_t pagenum)
-{
-    uint8_t previous;
-    while ((previous = __sync_lock_test_and_set(&list_lock, 1)))
-        spin_loop();
-    
-    flag_page_private[pagenum] = SHARED_PAGE;
-    LIST_APPEND(single_page_list, (object_t*)pagenum);
-
-    list_lock = 0;
-}
-
-
-
diff --git a/c7/pages.h b/c7/pages.h
deleted file mode 100644
--- a/c7/pages.h
+++ /dev/null
@@ -1,21 +0,0 @@
-enum {
-    /* unprivatized page seen by all threads */
-    SHARED_PAGE=0,
-
-    /* page being in the process of privatization */
-    REMAPPING_PAGE,
-
-    /* page private for each thread */
-    PRIVATE_PAGE,
-};  /* flag_page_private */
-
-
-void stm_pages_privatize(uintptr_t pagenum);
-uintptr_t stm_pages_reserve(int num);
-uint8_t stm_get_page_flag(int pagenum);
-void stm_set_page_flag(int pagenum, uint8_t flag);
-void _stm_reset_pages(void);
-void stm_pages_unreserve(uintptr_t num);
-
-
-
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -8,9 +8,48 @@
     abort();
 }
 
+static void reset_transaction_read_version(void)
+{
+    /* force-reset all read markers to 0 */
+
+    /* XXX measure the time taken by this madvise() and the following
+       zeroing of pages done lazily by the kernel; compare it with using
+       16-bit read_versions.
+    */
+    /* XXX try to use madvise() on smaller ranges of memory.  In my
+       measures, we could gain a factor 2 --- not really more, even if
+       the range of virtual addresses below is very large, as long as it
+       is already mostly non-reserved pages.  (The following call keeps
+       them non-reserved; apparently the kernel just skips them very
+       quickly.)
+    */
+    char *readmarkers = REAL_ADDRESS(STM_SEGMENT->segment_base,
+                                     FIRST_READMARKER_PAGE * 4096UL);
+    if (madvise(readmarkers, NB_READMARKER_PAGES * 4096UL,
+                MADV_DONTNEED) < 0) {
+        perror("madvise");
+        abort();
+    }
+    STM_SEGMENT->transaction_read_version = 1;
+}
+
 void stm_start_transaction(stm_thread_local_t *tl, stm_jmpbuf_t *jmpbuf)
 {
     /* GS invalid before this point! */
-    _stm_stop_safe_point(LOCK_COLLECT|THREAD_YIELD);
-    
+    acquire_thread_segment(tl);
+
+    STM_SEGMENT->jmpbuf_ptr = jmpbuf;
+
+    uint8_t old_rv = STM_SEGMENT->transaction_read_version;
+    STM_SEGMENT->transaction_read_version = old_rv + 1;
+    if (UNLIKELY(old_rv == 0xff))
+        reset_transaction_read_version();
 }
+
+
+void stm_commit_transaction(void)
+{
+    stm_thread_local_t *tl = STM_SEGMENT->running_thread;
+    release_thread_segment(tl);
+    abort();
+}
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <errno.h>
 
 
 #define NB_PAGES            (1500*256)    // 1500MB
@@ -12,16 +13,14 @@
 #define LARGE_OBJECT_WORDS  36
 #define NB_NURSERY_PAGES    1024          // 4MB
 
-#define NURSERY_SECTION_SIZE  (24*4096)
-
-
 #define TOTAL_MEMORY          (NB_PAGES * 4096UL * NB_SEGMENTS)
 #define READMARKER_END        ((NB_PAGES * 4096UL) >> 4)
 #define FIRST_OBJECT_PAGE     ((READMARKER_END + 4095) / 4096UL)
 #define FIRST_NURSERY_PAGE    FIRST_OBJECT_PAGE
+#define END_NURSERY_PAGE      (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES)
 #define READMARKER_START      ((FIRST_OBJECT_PAGE * 4096UL) >> 4)
 #define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
-#define END_NURSERY_PAGE      (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES)
+#define NB_READMARKER_PAGES   (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE)
 
 
 enum {
diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c
--- a/c7/stm/gcpage.c
+++ b/c7/stm/gcpage.c
@@ -3,12 +3,6 @@
 #endif
 
 
-stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
-{
-    abort();
-}
-
-
 object_t *stm_allocate_prebuilt(ssize_t size_rounded_up)
 {
     abort();
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -24,6 +24,17 @@
     return (object_t*)res;
 }
 
+struct stm_priv_segment_info_s *_stm_segment(void)
+{
+    char *info = REAL_ADDRESS(STM_SEGMENT->segment_base, STM_PSEGMENT);
+    return (struct stm_priv_segment_info_s *)info;
+}
+
+stm_thread_local_t *_stm_thread(void)
+{
+    return STM_SEGMENT->running_thread;
+}
+
 bool _stm_was_read(object_t *obj)
 {
     return ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm ==
diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/nursery.c
@@ -0,0 +1,63 @@
+#ifndef _STM_CORE_H_
+# error "must be compiled via stmgc.c"
+#endif
+
+/************************************************************/
+
+#define NURSERY_SIZE          (NB_NURSERY_PAGES * 4096UL)
+
+/* an object larger than LARGE_OBJECT will never be allocated in
+   the nursery. */
+#define LARGE_OBJECT          (65*1024)
+
+/* the nursery is divided in "sections" this big.  Each section is
+   allocated to a single running thread. */
+#define NURSERY_SECTION_SIZE  (128*1024)
+
+/* if objects are larger than this limit but smaller than LARGE_OBJECT,
+   then they might be allocted outside sections but still in the nursery. */
+#define MEDIUM_OBJECT         (9*1024)
+
+/************************************************************/
+
+static union {
+    struct {
+        uint64_t used;    /* number of bytes from the nursery used so far */
+    };
+    char reserved[64];
+} nursery_ctl __attribute__((aligned(64)));
+
+/************************************************************/
+
+static void setup_nursery(void)
+{
+    assert(MEDIUM_OBJECT < LARGE_OBJECT);
+    assert(LARGE_OBJECT < NURSERY_SECTION_SIZE);
+    nursery_ctl.used = 0;
+}
+
+
+static stm_char *allocate_from_nursery(uint64_t bytes)
+{
+    /* thread-safe; allocate a chunk of memory from the nursery */
+    uint64_t p = __sync_fetch_and_add(&nursery_ctl.used, bytes);
+    if (p + bytes > NURSERY_SIZE) {
+        //major_collection();
+        abort();
+    }
+    return (stm_char *)(FIRST_NURSERY_PAGE * 4096UL + p);
+}
+
+
+stm_char *_stm_allocate_slowpath(ssize_t size_rounded_up)
+{
+    if (size_rounded_up < MEDIUM_OBJECT) {
+        /* This is a small object.  The current section is simply full.
+           Allocate the next section. */
+        stm_char *p = allocate_from_nursery(NURSERY_SECTION_SIZE);
+        STM_SEGMENT->nursery_current = p + size_rounded_up;
+        STM_SEGMENT->nursery_section_end = (uintptr_t)p + NURSERY_SECTION_SIZE;
+        return p;
+    }
+    abort();
+}
diff --git a/c7/stm/pages.c b/c7/stm/pages.c
--- a/c7/stm/pages.c
+++ b/c7/stm/pages.c
@@ -19,9 +19,8 @@
             abort();
         }
     }
-    for (; count > 0; count--) {
-        flag_page_private[pagenum++] = SHARED_PAGE;
-    }
+    for (i = 0; i < count; i++)
+        flag_page_private[pagenum + i] = SHARED_PAGE;
 }
 
 static void _pages_privatize(uintptr_t pagenum, uintptr_t count)
diff --git a/c7/stm/setup.c b/c7/stm/setup.c
--- a/c7/stm/setup.c
+++ b/c7/stm/setup.c
@@ -59,6 +59,9 @@
        long time for each page. */
     pages_initialize_shared(FIRST_NURSERY_PAGE, NB_NURSERY_PAGES);
 
+    setup_sync();
+    setup_nursery();
+
 #if 0
     stm_largemalloc_init(heap, HEAP_PAGES * 4096UL);
 #endif
@@ -72,6 +75,8 @@
     stm_object_pages = NULL;
 
     memset(flag_page_private, 0, sizeof(flag_page_private));
+
+    teardown_sync();
 }
 
 void stm_register_thread_local(stm_thread_local_t *tl)
@@ -85,7 +90,7 @@
         stm_thread_locals->prev->next = tl;
         stm_thread_locals->prev = tl;
     }
-    tl->associated_segment = get_segment(0);
+    tl->associated_segment_num = -1;
 }
 
 void stm_unregister_thread_local(stm_thread_local_t *tl)
@@ -99,4 +104,7 @@
     }
     tl->prev->next = tl->next;
     tl->next->prev = tl->prev;
+    tl->prev = NULL;
+    tl->next = NULL;
+    tl->associated_segment_num = -1;
 }
diff --git a/c7/stm/sync.c b/c7/stm/sync.c
new file mode 100644
--- /dev/null
+++ b/c7/stm/sync.c
@@ -0,0 +1,82 @@
+#include <semaphore.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <asm/prctl.h>
+
+
+static union {
+    struct {
+        sem_t semaphore;
+        uint8_t in_use[NB_SEGMENTS];   /* 1 if running a pthread */
+    };
+    char reserved[64];
+} segments_ctl __attribute__((aligned(64)));
+
+
+static void setup_sync(void)
+{
+    memset(segments_ctl.in_use, 0, sizeof(segments_ctl.in_use));
+    if (sem_init(&segments_ctl.semaphore, 0, NB_SEGMENTS) != 0) {
+        perror("sem_init");
+        abort();
+    }
+}
+
+static void teardown_sync(void)
+{
+    if (sem_destroy(&segments_ctl.semaphore) != 0) {
+        perror("sem_destroy");
+        abort();
+    }
+}
+
+static void set_gs_register(char *value)
+{
+    if (syscall(SYS_arch_prctl, ARCH_SET_GS, (uint64_t)value) != 0) {
+        perror("syscall(arch_prctl, ARCH_SET_GS)");
+        abort();
+    }
+}
+
+static void acquire_thread_segment(stm_thread_local_t *tl)
+{
+    /* This function acquires a segment for the currently running thread,
+       and set up the GS register if it changed. */
+    while (sem_wait(&segments_ctl.semaphore) != 0) {
+        if (errno != EINTR) {
+            perror("sem_wait");
+            abort();
+        }
+    }
+    int num = tl->associated_segment_num;
+    if (num >= 0) {
+        if (__sync_lock_test_and_set(&segments_ctl.in_use[num], 1) == 0) {
+            /* fast-path: reacquired the same segment number than the one
+               we had.  The value stored in GS is still valid. */
+            goto exit;
+        }
+    }
+    /* Look for the next free segment.  There must be one, because we
+       acquired the semaphore above. */
+    while (1) {
+        num = (num + 1) % NB_SEGMENTS;
+        if (__sync_lock_test_and_set(&segments_ctl.in_use[num], 1) == 0)
+            break;
+    }
+    tl->associated_segment_num = num;
+    set_gs_register(get_segment_base(num));
+
+ exit:
+    assert(STM_SEGMENT->running_thread == NULL);
+    STM_SEGMENT->running_thread = tl;
+}
+
+static void release_thread_segment(stm_thread_local_t *tl)
+{
+    assert(STM_SEGMENT->running_thread == tl);
+    STM_SEGMENT->running_thread = NULL;
+
+    int num = tl->associated_segment_num;
+    __sync_lock_release(&segments_ctl.in_use[num]);
+    sem_post(&segments_ctl.semaphore);
+}
diff --git a/c7/stm/sync.h b/c7/stm/sync.h
new file mode 100644
--- /dev/null
+++ b/c7/stm/sync.h
@@ -0,0 +1,8 @@
+
+
+static void setup_sync(void);
+static void teardown_sync(void);
+
+/* acquire and release one of the segments for running the given thread */
+static void acquire_thread_segment(stm_thread_local_t *tl);
+static void release_thread_segment(stm_thread_local_t *tl);
diff --git a/c7/stmgc.c b/c7/stmgc.c
--- a/c7/stmgc.c
+++ b/c7/stmgc.c
@@ -2,9 +2,12 @@
 #include "stmgc.h"
 #include "stm/core.h"
 #include "stm/pages.h"
+#include "stm/sync.h"
 
 #include "stm/misc.c"
-#include "stm/core.c"
 #include "stm/pages.c"
 #include "stm/gcpage.c"
+#include "stm/nursery.c"
+#include "stm/sync.c"
 #include "stm/setup.c"
+#include "stm/core.c"
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -55,7 +55,7 @@
     /* every thread should handle the shadow stack itself */
     object_t **shadowstack, **shadowstack_base;
     /* the next fields are handled automatically by the library */
-    struct stm_segment_info_s *associated_segment;
+    int associated_segment_num;
     struct stm_thread_local_s *prev, *next;
 } stm_thread_local_t;
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to