[pypy-commit] stmgc c7: Initial checkin of the code from

arigo Thu, 02 Jan 2014 01:27:36 -0800

Author: Armin Rigo <[email protected]>
Branch: c7
Changeset: r593:a7e3185f3ead
Date: 2014-01-02 10:26 +0100
http://bitbucket.org/pypy/stmgc/changeset/a7e3185f3ead/


Log:    Initial checkin of the code from
        https://bitbucket.org/arigo/arigo/raw/default/hack/stm/c7

diff --git a/c7/core.c b/c7/core.c
new file mode 100644
--- /dev/null
+++ b/c7/core.c
@@ -0,0 +1,648 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#include "core.h"
+#include "list.h"
+#include "pagecopy.h"
+
+
+#define NB_PAGES            (256*256)    // 256MB
+#define NB_THREADS          2
+#define MAP_PAGES_FLAGS     (MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE)
+#define LARGE_OBJECT_WORDS  36
+
+
+typedef TLPREFIX char localchar_t;
+typedef TLPREFIX struct alloc_for_size_s alloc_for_size_t;
+typedef TLPREFIX struct _thread_local2_s _thread_local2_t;
+
+
+struct alloc_for_size_s {
+    localchar_t *next;
+    uint16_t start, stop;
+    bool flag_partial_page;
+};
+
+struct _thread_local2_s {
+    struct _thread_local1_s _tl1;
+    int thread_num;
+    char *thread_base;
+    struct stm_list_s *modified_objects;
+    struct stm_list_s *new_object_ranges;
+    struct alloc_for_size_s alloc[LARGE_OBJECT_WORDS];
+};
+#define _STM_TL2            ((_thread_local2_t *)_STM_TL1)
+
+enum { SHARED_PAGE=0, REMAPPING_PAGE, PRIVATE_PAGE };  /* flag_page_private */
+
+
+static char *object_pages;
+static char *undo_log_pages;
+static char *undo_log_current;
+static int num_threads_started, leader_thread_num;
+static uintptr_t index_page_never_used;
+static int next_write_version;
+static int undo_lock;
+static struct stm_list_s *global_history;
+static uint16_t gh_write_version_first;
+static uint16_t gh_write_version_last;
+static uint8_t flag_page_private[NB_PAGES];   /* xxx_PAGE constants above */
+
+
+/************************************************************/
+
+static void spin_loop(void)
+{
+    asm("pause" : : : "memory");
+}
+
+static void acquire_lock(int *lock)
+{
+    while (__sync_lock_test_and_set(lock, 1) != 0) {
+        while (*lock != 0)
+            spin_loop();
+    }
+}
+
+#define ACQUIRE_LOCK_IF(lock, condition)                \
+({                                                      \
+    bool _acquired = false;                             \
+    while (condition) {                                 \
+        if (__sync_lock_test_and_set(lock, 1) == 0) {   \
+            if (condition)                              \
+                _acquired = true;                       \
+            else                                        \
+                __sync_lock_release(lock);              \
+            break;                                      \
+        }                                               \
+        spin_loop();                                    \
+    }                                                   \
+    _acquired;                                          \
+})
+
+static void release_lock(int *lock)
+{
+    __sync_lock_release(lock);
+}
+
+static void write_fence(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+    asm("" : : : "memory");
+#else
+#  error "Define write_fence() for your architecture"
+#endif
+}
+
+static bool _stm_was_read(object_t *obj)
+{
+    read_marker_t *marker = (read_marker_t *)(((uintptr_t)obj) >> 4);
+    return (marker->rm == _STM_TL1->transaction_read_version);
+}
+
+
+static void _stm_privatize(uintptr_t pagenum)
+{
+    if (flag_page_private[pagenum] == PRIVATE_PAGE)
+        return;
+
+    if (!__sync_bool_compare_and_swap(&flag_page_private[pagenum],
+                                      SHARED_PAGE, REMAPPING_PAGE)) {
+        while (flag_page_private[pagenum] == REMAPPING_PAGE)
+            spin_loop();
+        assert(flag_page_private[pagenum] == PRIVATE_PAGE);
+        return;
+    }
+
+    ssize_t pgoff1 = pagenum;
+    ssize_t pgoff2 = pagenum + NB_PAGES;
+    ssize_t localpgoff = pgoff1 + NB_PAGES * _STM_TL2->thread_num;
+    ssize_t otherpgoff = pgoff1 + NB_PAGES * (1 - _STM_TL2->thread_num);
+
+    void *localpg = object_pages + localpgoff * 4096UL;
+    void *otherpg = object_pages + otherpgoff * 4096UL;
+
+    int res = remap_file_pages(localpg, 4096, 0, pgoff2, 0);
+    if (res < 0) {
+        perror("remap_file_pages");
+        abort();
+    }
+    pagecopy(localpg, otherpg);
+    write_fence();
+    assert(flag_page_private[pagenum] == REMAPPING_PAGE);
+    flag_page_private[pagenum] = PRIVATE_PAGE;
+}
+
+
+#define REAL_ADDRESS(object_pages, src)   ((object_pages) + (uintptr_t)(src))
+
+static char *real_address(uintptr_t src)
+{
+    return REAL_ADDRESS(_STM_TL2->thread_base, src);
+}
+
+static char *get_thread_base(long thread_num)
+{
+    return object_pages + thread_num * (NB_PAGES * 4096UL);
+}
+
+void stm_abort_transaction(void);
+
+enum detect_conflicts_e { CANNOT_CONFLICT, CAN_CONFLICT };
+
+/* XXX this can be done by acquiring the undo_lock for much less time,
+   but it needs to be carefully synchronized with _stm_write_slowpath().
+   For now it must be called with the undo_lock acquired. */
+static void update_to_current_version(enum detect_conflicts_e check_conflict)
+{
+    /* Loop over objects in 'global_history': if they have been
+       read by the current transaction, the current transaction must
+       abort; then copy them out of the leader's object space ---
+       which may have been modified by the leader's uncommitted
+       transaction; this case will be fixed afterwards.
+    */
+    bool conflict_found_or_dont_check = (check_conflict == CANNOT_CONFLICT);
+    char *local_base = _STM_TL2->thread_base;
+    char *remote_base = get_thread_base(1 - _STM_TL2->thread_num);
+    struct stm_list_s *gh, *gh_next;
+
+    assert(leader_thread_num != _STM_TL2->thread_num);
+
+    for (gh = global_history; gh != NULL; gh = gh_next) {
+
+        STM_LIST_FOREACH(gh, ({
+
+            if (!conflict_found_or_dont_check)
+                conflict_found_or_dont_check = _stm_was_read(item);
+
+            char *dst = REAL_ADDRESS(local_base, item);
+            char *src = REAL_ADDRESS(remote_base, item);
+            char *src_rebased = src - (uintptr_t)local_base;
+            size_t size = stm_object_size_rounded_up((object_t *)src_rebased);
+
+            memcpy(dst + sizeof(char *),
+                   src + sizeof(char *),
+                   size - sizeof(char *));
+        }));
+
+        gh_next = gh->nextlist;
+        stm_list_free(gh);
+    }
+    global_history = NULL;
+    gh_write_version_first = 0xffff;
+    gh_write_version_last = 0;
+
+    /* Finally, loop over objects modified by the leader,
+       and copy them out of the undo log.
+    */
+    char *undo = undo_log_pages;
+    char *undo_end = undo_log_current;
+
+    while (undo < undo_end) {
+
+        char *src = undo;
+        char *dst = *(char **)src;
+        char *src_rebased = src - (uintptr_t)local_base;
+
+        *(char **)src = *(char **)dst; /* fix the first word of the object in
+                                         the undo log, for stm_object_size() */
+        size_t size = stm_object_size_rounded_up((object_t *)src_rebased);
+
+        memcpy(dst + sizeof(char *),
+               src + sizeof(char *),
+               size - sizeof(char *));
+
+        undo += size;
+    }
+    undo_log_current = undo_log_pages;   /* make empty again */
+
+    if (conflict_found_or_dont_check && check_conflict == CAN_CONFLICT) {
+        release_lock(&undo_lock);
+        stm_abort_transaction();
+    }
+}
+
+static void maybe_update(enum detect_conflicts_e check_conflict)
+{
+    if (leader_thread_num != _STM_TL2->thread_num && global_history != NULL) {
+        acquire_lock(&undo_lock);
+        update_to_current_version(check_conflict);
+        release_lock(&undo_lock);
+    }
+}
+
+
+void _stm_write_slowpath(object_t *obj)
+{
+    maybe_update(CAN_CONFLICT);
+
+    _stm_privatize(((uintptr_t)obj) / 4096);
+
+    stm_read(obj);
+
+    _STM_TL2->modified_objects = stm_list_append(_STM_TL2->modified_objects, 
obj);
+
+    uint16_t wv = obj->write_version;
+    obj->write_version = _STM_TL1->transaction_write_version;
+
+    /* We only need to store a copy of the current version of the object if:
+       - we are the leader;
+       - the object is present in the global_history.
+       The second condition is approximated by the following range check.
+       Storing a few more objects than strictly needed is not really a problem.
+    */
+    /* XXX this can be done without acquiring the undo_lock at all,
+       but we need more care in update_to_current_version(). */
+
+    /* XXX can we avoid writing an unbounded number of copies of the
+       same object in case we run a lot of transactions while the other
+       thread is busy?  Unlikely case but in theory annoying.  Should
+       we anyway bound the undo log's size to much less than NB_PAGES,
+       and if full here, sleep?  Should the bound also count the size
+       taken by the global_history lists? */
+    if (ACQUIRE_LOCK_IF(&undo_lock,
+            wv <= gh_write_version_last && wv >= gh_write_version_first
+                && leader_thread_num == _STM_TL2->thread_num)) {
+        /* record in the undo log a copy of the content of the object */
+        size_t size = stm_object_size_rounded_up(obj);
+        char *source = real_address((uintptr_t)obj);
+        char *undo = undo_log_current;
+        *((object_t **)undo) = obj;
+        memcpy(undo + sizeof(object_t *),
+               source + sizeof(object_t *),
+               size - sizeof(object_t *));
+        /*write_fence();*/
+        undo_log_current = undo + size;
+        release_lock(&undo_lock);
+    }
+}
+
+
+uintptr_t _stm_reserve_page(void)
+{
+    /* Grab a free page, initially shared between the threads. */
+
+    // XXX look in some free list first
+
+    /* Return the index'th object page, which is so far never used. */
+    uintptr_t index = __sync_fetch_and_add(&index_page_never_used, 1);
+    if (index >= NB_PAGES) {
+        fprintf(stderr, "Out of mmap'ed memory!\n");
+        abort();
+    }
+    return index;
+}
+
+#define TO_RANGE(range, start, stop)                                    \
+    ((range) = (object_t *)((start) | (((uintptr_t)(stop)) << 16)))
+
+#define FROM_RANGE(start, stop, range)          \
+    ((start) = (uint16_t)(uintptr_t)(range),    \
+     (stop) = ((uintptr_t)(range)) >> 16)
+
+localchar_t *_stm_alloc_next_page(size_t i)
+{
+    /* 'alloc->next' points to where the next allocation should go.  The
+       present function is called instead when this next allocation is
+       equal to 'alloc->stop'.  As we know that 'start', 'next' and
+       'stop' are always nearby pointers, we play tricks and only store
+       the lower 16 bits of 'start' and 'stop', so that the three
+       variables plus some flags fit in 16 bytes.
+
+       'flag_partial_page' is *cleared* to mean that the 'alloc'
+       describes a complete page, so that it needs not be listed inside
+       'new_object_ranges'.  In all other cases it is *set*.
+    */
+    uintptr_t page;
+    localchar_t *result;
+    alloc_for_size_t *alloc = &_STM_TL2->alloc[i];
+    size_t size = i * 8;
+
+    if (alloc->flag_partial_page) {
+        /* record this range in 'new_object_ranges' */
+        localchar_t *ptr1 = alloc->next - size - 1;
+        object_t *range;
+        TO_RANGE(range, alloc->start, alloc->stop);
+        page = ((uintptr_t)ptr1) / 4096;
+        _STM_TL2->new_object_ranges = stm_list_append(
+            _STM_TL2->new_object_ranges, (object_t *)page);
+        _STM_TL2->new_object_ranges = stm_list_append(
+            _STM_TL2->new_object_ranges, range);
+    }
+
+    /* reserve a fresh new page */
+    page = _stm_reserve_page();
+
+    result = (localchar_t *)(page * 4096UL);
+    alloc->start = (uintptr_t)result;
+    alloc->stop = alloc->start + (4096 / size) * size;
+    alloc->next = result + size;
+    alloc->flag_partial_page = false;
+    return result;
+}
+
+object_t *stm_allocate(size_t size)
+{
+    assert(size % 8 == 0);
+    size_t i = size / 8;
+    assert(2 <= i && i < LARGE_OBJECT_WORDS);//XXX
+    alloc_for_size_t *alloc = &_STM_TL2->alloc[i];
+
+    localchar_t *p = alloc->next;
+    alloc->next = p + size;
+    if ((uint16_t)(uintptr_t)p == alloc->stop)
+        p = _stm_alloc_next_page(i);
+
+    object_t *result = (object_t *)p;
+    result->write_version = _STM_TL1->transaction_write_version;
+    return result;
+}
+
+
+#define TOTAL_MEMORY          (NB_PAGES * 4096UL * (NB_THREADS + 1))
+#define READMARKER_END        ((NB_PAGES * 4096UL) >> 4)
+#define FIRST_OBJECT_PAGE     ((READMARKER_END + 4095) / 4096UL)
+#define READMARKER_START      ((FIRST_OBJECT_PAGE * 4096UL) >> 4)
+#define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL)
+
+void stm_setup(void)
+{
+    /* Check that some values are acceptable */
+    assert(4096 <= ((uintptr_t)_STM_TL1));
+    assert(((uintptr_t)_STM_TL1) == ((uintptr_t)_STM_TL2));
+    assert(((uintptr_t)_STM_TL2) + sizeof(*_STM_TL2) <= 8192);
+    assert(2 <= FIRST_READMARKER_PAGE);
+    assert(FIRST_READMARKER_PAGE * 4096UL <= READMARKER_START);
+    assert(READMARKER_START < READMARKER_END);
+    assert(READMARKER_END <= 4096UL * FIRST_OBJECT_PAGE);
+    assert(FIRST_OBJECT_PAGE < NB_PAGES);
+
+    object_pages = mmap(NULL, TOTAL_MEMORY,
+                        PROT_READ | PROT_WRITE,
+                        MAP_PAGES_FLAGS, -1, 0);
+    if (object_pages == MAP_FAILED) {
+        perror("object_pages mmap");
+        abort();
+    }
+
+    long i;
+    for (i = 0; i < NB_THREADS; i++) {
+        char *thread_base = get_thread_base(i);
+
+        /* In each thread's section, the first page is where TLPREFIX'ed
+           NULL accesses land.  We mprotect it so that accesses fail. */
+        mprotect(thread_base, 4096, PROT_NONE);
+
+        /* Fill the TLS page (page 1) with 0xDD */
+        memset(REAL_ADDRESS(thread_base, 4096), 0xDD, 4096);
+        /* Make a "hole" at _STM_TL1 / _STM_TL2 */
+        memset(REAL_ADDRESS(thread_base, _STM_TL2), 0, sizeof(*_STM_TL2));
+
+        _STM_TL2->thread_num = i;
+        _STM_TL2->thread_base = thread_base;
+
+        if (i > 0) {
+            int res;
+            res = remap_file_pages(thread_base + FIRST_OBJECT_PAGE * 4096UL,
+                                   (NB_PAGES - FIRST_OBJECT_PAGE) * 4096UL,
+                                   0, FIRST_OBJECT_PAGE, 0);
+            if (res != 0) {
+                perror("remap_file_pages");
+                abort();
+            }
+        }
+    }
+
+    undo_log_pages = get_thread_base(NB_THREADS);
+    mprotect(undo_log_pages, 4096, PROT_NONE);
+    mprotect(undo_log_pages + (NB_PAGES - 1) * 4096UL, 4096, PROT_NONE);
+    undo_log_pages += 4096;
+    undo_log_current = undo_log_pages;
+
+    num_threads_started = 0;
+    index_page_never_used = FIRST_OBJECT_PAGE;
+    next_write_version = 1;
+    leader_thread_num = 0;
+    global_history = NULL;
+    gh_write_version_first = 0xffff;
+    gh_write_version_last = 0;
+}
+
+#define INVALID_GS_VALUE  0xDDDDDDDDDDDDDDDDUL
+
+static void set_gs_register(uint64_t value)
+{
+    int result = syscall(SYS_arch_prctl, ARCH_SET_GS, value);
+    assert(result == 0);
+}
+
+void stm_setup_thread(void)
+{
+    int thread_num = __sync_fetch_and_add(&num_threads_started, 1);
+    assert(thread_num < 2);  /* only 2 threads for now */
+
+    char *thread_base = get_thread_base(thread_num);
+    set_gs_register((uintptr_t)thread_base);
+
+    assert(_STM_TL2->thread_num == thread_num);
+    assert(_STM_TL2->thread_base == thread_base);
+
+    _STM_TL2->modified_objects = stm_list_create();
+}
+
+void _stm_teardown_thread(void)
+{
+    stm_list_free(_STM_TL2->modified_objects);
+    _STM_TL2->modified_objects = NULL;
+
+    set_gs_register(INVALID_GS_VALUE);
+}
+
+void _stm_teardown(void)
+{
+    munmap(object_pages, TOTAL_MEMORY);
+    object_pages = NULL;
+    undo_log_pages = NULL;
+    undo_log_current = NULL;
+}
+
+
+static void reset_transaction_read_version(void)
+{
+    /* force-reset all read markers to 0 */
+    int res = madvise(real_address(FIRST_READMARKER_PAGE * 4096UL),
+                      (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE) * 4096UL,
+                      MADV_DONTNEED);
+    if (res < 0) {
+        perror("madvise");
+        abort();
+    }
+    _STM_TL1->transaction_read_version = 0;
+}
+
+void stm_major_collection(void)
+{
+    abort();
+}
+
+void stm_start_transaction(jmp_buf *jmpbufptr)
+{
+    if (_STM_TL1->transaction_read_version == 0xff)
+        reset_transaction_read_version();
+    _STM_TL1->transaction_read_version++;
+    _STM_TL1->jmpbufptr = NULL;
+
+    while (1) {
+        int wv = __sync_fetch_and_add(&next_write_version, 1);
+        if (LIKELY(wv <= 0xffff)) {
+            _STM_TL1->transaction_write_version = wv;
+            break;
+        }
+        /* We run out of 16-bit numbers before we do the next major
+           collection, which resets it.  XXX This case seems unlikely
+           for now, but check if it could become a bottleneck at some
+           point. */
+        stm_major_collection();
+    }
+    assert(stm_list_is_empty(_STM_TL2->modified_objects));
+    assert(stm_list_is_empty(_STM_TL2->new_object_ranges));
+
+    maybe_update(CANNOT_CONFLICT);    /* no read object: cannot conflict */
+
+    _STM_TL1->jmpbufptr = jmpbufptr;
+}
+
+static void update_new_objects_in_other_threads(uintptr_t pagenum,
+                                                uint16_t start, uint16_t stop)
+{
+    size_t size = (uint16_t)(stop - start);
+    assert(size <= 4096 - (start & 4095));
+    assert((start & ~4095) == (uint16_t)(pagenum * 4096));
+
+    int thread_num = _STM_TL2->thread_num;
+    uintptr_t local_src = (pagenum * 4096UL) + (start & 4095);
+    char *dst = REAL_ADDRESS(get_thread_base(1 - thread_num), local_src);
+    char *src = REAL_ADDRESS(_STM_TL2->thread_base,           local_src);
+
+    memcpy(dst, src, size);
+}
+
+void stm_stop_transaction(void)
+{
+    write_fence();   /* see later in this function for why */
+
+    acquire_lock(&undo_lock);
+
+    if (leader_thread_num != _STM_TL2->thread_num) {
+        /* non-leader thread */
+        if (global_history != NULL) {
+            update_to_current_version(CAN_CONFLICT);
+            assert(global_history == NULL);
+        }
+
+        /* steal leadership now */
+        leader_thread_num = _STM_TL2->thread_num;
+    }
+
+    /* now we are the leader thread.  the leader can always commit */
+    _STM_TL1->jmpbufptr = NULL;          /* cannot abort any more */
+    undo_log_current = undo_log_pages;   /* throw away the content */
+
+    /* add these objects to the global_history */
+    _STM_TL2->modified_objects->nextlist = global_history;
+    global_history = _STM_TL2->modified_objects;
+    _STM_TL2->modified_objects = stm_list_create();
+
+    uint16_t wv = _STM_TL1->transaction_write_version;
+    if (wv < gh_write_version_last)  gh_write_version_last  = wv;
+    if (wv > gh_write_version_first) gh_write_version_first = wv;
+
+    /* walk the new_object_ranges and manually copy the new objects
+       to the other thread's pages in the (hopefully rare) case that
+       the page they belong to is already unshared */
+    long i;
+    struct stm_list_s *lst = _STM_TL2->new_object_ranges;
+    for (i = stm_list_count(lst); i > 0; ) {
+        i -= 2;
+        uintptr_t pagenum = (uintptr_t)stm_list_item(lst, i);
+
+        /* NB. the read next line should work even against a parallel
+           thread, thanks to the lock acquisition we do earlier (see the
+           beginning of this function).  Indeed, if this read returns
+           SHARED_PAGE, then we know that the real value in memory was
+           actually SHARED_PAGE at least at the time of the
+           acquire_lock().  It may have been modified afterwards by a
+           compare_and_swap() in the other thread, but then we know for
+           sure that the other thread is seeing the last, up-to-date
+           version of our data --- this is the reason of the
+           write_fence() just before the acquire_lock().
+        */
+        if (flag_page_private[pagenum] != SHARED_PAGE) {
+            object_t *range = stm_list_item(lst, i + 1);
+            uint16_t start, stop;
+            FROM_RANGE(start, stop, range);
+            update_new_objects_in_other_threads(pagenum, start, stop);
+        }
+    }
+
+    /* do the same for the partially-allocated pages */
+    long j;
+    for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
+        alloc_for_size_t *alloc = &_STM_TL2->alloc[j];
+        uint16_t start = alloc->start;
+        uint16_t cur = (uintptr_t)alloc->next;
+
+        if (start == cur) {
+            /* nothing to do: this assigned page was left empty by the
+               previous transaction, and also starts empty in the new
+               transaction.  'flag_partial_page' is unchanged. */
+        }
+        else {
+            uintptr_t pagenum = ((uintptr_t)(alloc->next - 1)) / 4096UL;
+            /* for the new transaction, it will start here: */
+            alloc->start = cur;
+
+            if (alloc->flag_partial_page) {
+                if (flag_page_private[pagenum] != SHARED_PAGE) {
+                    update_new_objects_in_other_threads(pagenum, start, cur);
+                }
+            }
+            else {
+                /* we can skip checking page->private_page because the
+                   whole page can only contain objects made by the just-
+                   finished transaction. */
+                assert(flag_page_private[pagenum] == SHARED_PAGE);
+
+                /* the next transaction will start with this page
+                   containing objects that are now committed, so
+                   we need to set this flag now */
+                alloc->flag_partial_page = true;
+            }
+        }
+    }
+
+    release_lock(&undo_lock);
+}
+
+void stm_abort_transaction(void)
+{
+    long j;
+    for (j = 2; j < LARGE_OBJECT_WORDS; j++) {
+        alloc_for_size_t *alloc = &_STM_TL2->alloc[j];
+        uint16_t num_allocated = ((uintptr_t)alloc->next) - alloc->start;
+        alloc->next -= num_allocated;
+    }
+    stm_list_clear(_STM_TL2->new_object_ranges);
+    stm_list_clear(_STM_TL2->modified_objects);
+    assert(_STM_TL1->jmpbufptr != NULL);
+    assert(_STM_TL1->jmpbufptr != (jmp_buf *)-1);   /* for tests only */
+    longjmp(*_STM_TL1->jmpbufptr, 1);
+}
diff --git a/c7/core.h b/c7/core.h
new file mode 100644
--- /dev/null
+++ b/c7/core.h
@@ -0,0 +1,58 @@
+#ifndef _STM_CORE_H
+#define _STM_CORE_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+
+#define TLPREFIX __attribute__((address_space(256)))
+
+typedef TLPREFIX struct _thread_local1_s _thread_local1_t;
+typedef TLPREFIX struct object_s object_t;
+typedef TLPREFIX struct read_marker_s read_marker_t;
+
+
+struct object_s {
+    uint16_t write_version;
+    /*uint8_t stm_flags;*/
+};
+
+struct read_marker_s {
+    uint8_t rm;
+};
+
+struct _thread_local1_s {
+    jmp_buf *jmpbufptr;
+    uint8_t transaction_read_version;
+    uint16_t transaction_write_version;
+};
+#define _STM_TL1            ((_thread_local1_t *)4352)
+
+
+/* this should use llvm's coldcc calling convention,
+   but it's not exposed to C code so far */
+void _stm_write_slowpath(object_t *);
+
+#define LIKELY(x)   __builtin_expect(x, true)
+#define UNLIKELY(x) __builtin_expect(x, false)
+
+
+static inline void stm_read(object_t *obj)
+{
+    ((read_marker_t *)(((uintptr_t)obj) >> 4))->rm =
+        _STM_TL1->transaction_read_version;
+}
+
+static inline void stm_write(object_t *obj)
+{
+    if (UNLIKELY(obj->write_version != _STM_TL1->transaction_write_version))
+        _stm_write_slowpath(obj);
+}
+
+
+/* must be provided by the user of this library */
+extern size_t stm_object_size_rounded_up(object_t *);
+
+
+#endif
diff --git a/c7/list.c b/c7/list.c
new file mode 100644
--- /dev/null
+++ b/c7/list.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "list.h"
+
+
+#define SETSIZE(n)    (sizeof(struct stm_list_s) + ITEMSSIZE(n))
+#define ITEMSSIZE(n)  ((n) * sizeof(object_t*))
+#define OVERCNT(n)    (33 + ((((n) / 2) * 3) | 1))
+
+struct stm_list_s *stm_list_create(void)
+{
+    uintptr_t initial_allocation = 32;
+    struct stm_list_s *lst = malloc(SETSIZE(initial_allocation));
+    if (lst == NULL) {
+        perror("out of memory in stm_list_create");
+        abort();
+    }
+    lst->count = 0;
+    lst->last_allocated = initial_allocation - 1;
+    assert(lst->last_allocated & 1);
+    return lst;
+}
+
+struct stm_list_s *_stm_list_grow(struct stm_list_s *lst, uintptr_t nalloc)
+{
+    assert(lst->last_allocated & 1);
+    nalloc = OVERCNT(nalloc);
+    lst = realloc(lst, SETSIZE(nalloc));
+    if (lst == NULL) {
+        perror("out of memory in _stm_list_grow");
+        abort();
+    }
+    lst->last_allocated = nalloc - 1;
+    assert(lst->last_allocated & 1);
+    return lst;
+}
diff --git a/c7/list.h b/c7/list.h
new file mode 100644
--- /dev/null
+++ b/c7/list.h
@@ -0,0 +1,67 @@
+#ifndef _STM_LIST_H
+#define _STM_LIST_H
+
+#include "core.h"
+
+
+struct stm_list_s {
+    uintptr_t count;
+    union {
+        uintptr_t last_allocated;       /* always odd */
+        struct stm_list_s *nextlist;    /* always even */
+    };
+    object_t *items[];
+};
+
+struct stm_list_s *stm_list_create(void);
+
+static inline void stm_list_free(struct stm_list_s *lst)
+{
+    free(lst);
+}
+
+
+struct stm_list_s *_stm_list_grow(struct stm_list_s *, uintptr_t);
+
+static inline struct stm_list_s *
+stm_list_append(struct stm_list_s *lst, object_t *item)
+{
+    uintptr_t index = lst->count++;
+    if (UNLIKELY(index > lst->last_allocated))
+        lst = _stm_list_grow(lst, index);
+    lst->items[index] = item;
+    return lst;
+}
+
+static inline void stm_list_clear(struct stm_list_s *lst)
+{
+    lst->count = 0;
+}
+
+static inline bool stm_list_is_empty(struct stm_list_s *lst)
+{
+    return (lst->count == 0);
+}
+
+static inline bool stm_list_count(struct stm_list_s *lst)
+{
+    return lst->count;
+}
+
+static inline object_t *stm_list_item(struct stm_list_s *lst, uintptr_t index)
+{
+    return lst->items[index];
+}
+
+#define STM_LIST_FOREACH(lst, CODE)             \
+    do {                                        \
+        struct stm_list_s *_lst = (lst);        \
+        uintptr_t _i;                           \
+        for (_i = _lst->count; _i--; ) {        \
+            object_t *item = _lst->items[_i];   \
+            CODE;                               \
+        }                                       \
+    } while (0)
+
+
+#endif
diff --git a/c7/pagecopy.c b/c7/pagecopy.c
new file mode 100644
--- /dev/null
+++ b/c7/pagecopy.c
@@ -0,0 +1,57 @@
+
+void pagecopy(void *dest, const void *src)
+{
+    unsigned long i;
+    for (i=0; i<4096/128; i++) {
+        asm volatile("movdqa (%0), %%xmm0\n"
+                     "movdqa 16(%0), %%xmm1\n"
+                     "movdqa 32(%0), %%xmm2\n"
+                     "movdqa 48(%0), %%xmm3\n"
+                     "movdqa %%xmm0, (%1)\n"
+                     "movdqa %%xmm1, 16(%1)\n"
+                     "movdqa %%xmm2, 32(%1)\n"
+                     "movdqa %%xmm3, 48(%1)\n"
+                     "movdqa 64(%0), %%xmm0\n"
+                     "movdqa 80(%0), %%xmm1\n"
+                     "movdqa 96(%0), %%xmm2\n"
+                     "movdqa 112(%0), %%xmm3\n"
+                     "movdqa %%xmm0, 64(%1)\n"
+                     "movdqa %%xmm1, 80(%1)\n"
+                     "movdqa %%xmm2, 96(%1)\n"
+                     "movdqa %%xmm3, 112(%1)\n"
+                     :
+                     : "r"(src + 128*i), "r"(dest + 128*i)
+                     : "xmm0", "xmm1", "xmm2", "xmm3", "memory");
+    }
+}
+
+#if 0   /* XXX enable if detected on the cpu */
+void pagecopy_ymm8(void *dest, const void *src)
+{
+    asm volatile("0:\n"
+                 "vmovdqa (%0), %%ymm0\n"
+                 "vmovdqa 32(%0), %%ymm1\n"
+                 "vmovdqa 64(%0), %%ymm2\n"
+                 "vmovdqa 96(%0), %%ymm3\n"
+                 "vmovdqa 128(%0), %%ymm4\n"
+                 "vmovdqa 160(%0), %%ymm5\n"
+                 "vmovdqa 192(%0), %%ymm6\n"
+                 "vmovdqa 224(%0), %%ymm7\n"
+                 "addq $256, %0\n"
+                 "vmovdqa %%ymm0, (%1)\n"
+                 "vmovdqa %%ymm1, 32(%1)\n"
+                 "vmovdqa %%ymm2, 64(%1)\n"
+                 "vmovdqa %%ymm3, 96(%1)\n"
+                 "vmovdqa %%ymm4, 128(%1)\n"
+                 "vmovdqa %%ymm5, 160(%1)\n"
+                 "vmovdqa %%ymm6, 192(%1)\n"
+                 "vmovdqa %%ymm7, 224(%1)\n"
+                 "addq $256, %1\n"
+                 "cmpq %2, %0\n"
+                 "jne 0b"
+                 : "=r"(src), "=r"(dest)
+                 : "r"((char *)src + 4096), "0"(src), "1"(dest)
+                 : "xmm0", "xmm1", "xmm2", "xmm3",
+                   "xmm4", "xmm5", "xmm6", "xmm7");
+}
+#endif
diff --git a/c7/pagecopy.h b/c7/pagecopy.h
new file mode 100644
--- /dev/null
+++ b/c7/pagecopy.h
@@ -0,0 +1,2 @@
+
+void pagecopy(void *dest, const void *src);
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] stmgc c7: Initial checkin of the code from

Reply via email to