Author: Armin Rigo <[email protected]>
Branch: c5
Changeset: r583:89a559713dd1
Date: 2013-12-23 11:01 +0100
http://bitbucket.org/pypy/stmgc/changeset/89a559713dd1/
Log: in-progress
diff --git a/c5/Makefile b/c5/Makefile
--- a/c5/Makefile
+++ b/c5/Makefile
@@ -2,12 +2,14 @@
H_FILES = core.h pagecopy.h
C_FILES = core.c pagecopy.c
+CLANG = clang -Wall -ferror-limit=3 -fno-color-diagnostics
+
demo1: demo1.c $(C_FILES) $(H_FILES)
- gcc -pthread -o $@ -O2 -g demo1.c $(C_FILES) -Wall
+ $(CLANG) -pthread -o $@ -O2 -g demo1.c $(C_FILES)
demo2: demo2.c largemalloc.c largemalloc.h
- gcc -o $@ -g demo2.c largemalloc.c -Wall
+ $(CLANG) -o $@ -g demo2.c largemalloc.c
clean:
rm -f demo1 demo2
diff --git a/c5/core.c b/c5/core.c
--- a/c5/core.c
+++ b/c5/core.c
@@ -1,7 +1,11 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
+#include <unistd.h>
#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
@@ -12,7 +16,7 @@
/* This only works with clang, and on 64-bit Linux, for now.
It depends on:
-
+
* the %gs segment prefix
This a hack using __attribute__((address_space(256))) on
@@ -106,7 +110,7 @@
*/
#define NB_PAGES (256*1024) // 1GB
-#define NB_THREADS 128
+#define NB_THREADS 16
#define MAP_PAGES_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_NORESERVE)
#define CACHE_LINE_SIZE 128 // conservatively large value to avoid aliasing
@@ -115,34 +119,33 @@
struct page_header_s {
/* Every page starts with one such structure */
- uint16_t version; /* when the data in the page was written */
uint8_t obj_word_size; /* size of all objects in this page, in words
in range(2, LARGE_OBJECT_WORDS) */
- uint32_t write_log_index;
+ _Bool thread_local_copy;
+ uint32_t write_log_index_cache;
};
-struct write_log_s {
- uint32_t pgoff;
- uint32_t modif[8]; /* N'th bit set if and only if object at N*16 changed
*/
-};
+struct write_entry_s {
+ uint32_t pgoff; /* the pgoff of the page that was modified */
+ uint64_t bitmask[4]; /* bit N is set if object at 'N*16' was modified */
+} __attribute__((packed));
struct write_history_s {
struct write_history_s *previous_older_transaction;
uint16_t transaction_version;
- struct write_log_s log[]; /* ends with pgoff == 0 */
+ uint32_t nb_updates;
+ struct write_entry_s updates[];
};
struct shared_descriptor_s {
/* There is a single shared descriptor. This contains global
- variables, but as a structure, in order to control the sharing at
- the cache line level --- we don't want the following few
+ variables, but as a structure, in order to control the aliasing
+ at the cache line level --- we don't want the following few
variables to be accidentally in the same cache line. */
- char _pad0[CACHE_LINE_SIZE]; uint64_t volatile index_page_never_used;
- char _pad1[CACHE_LINE_SIZE]; unsigned int volatile
next_transaction_version;
- /* always EVEN */
- char _pad2[CACHE_LINE_SIZE]; struct write_history_s *
- volatile
most_recent_committed_transaction;
- char _pad3[CACHE_LINE_SIZE];
+ char pad0[CACHE_LINE_SIZE]; uint64_t volatile index_page_never_used;
+ char pad2[CACHE_LINE_SIZE]; struct write_history_s *
+ volatile most_recent_committed_transaction;
+ char pad3[CACHE_LINE_SIZE];
};
struct alloc_for_size_s {
@@ -154,139 +157,108 @@
/* All the thread-local variables we need. */
struct write_history_s *base_page_mapping;
struct write_history_s *writes_by_this_transaction;
+ uint32_t nb_updates_max;
struct alloc_for_size_s alloc[LARGE_OBJECT_WORDS];
- char *read_markers;
+ uint64_t gs_value;
_thread_local1_t _stm_tl1; /* space for the macro _STM_TL1 in core.h */
} _thread_local2_t;
#define _STM_TL2 (((_thread_local2_t *)0)[-1])
+char *stm_object_pages;
struct shared_descriptor_s stm_shared_descriptor;
+volatile int stm_next_thread_index;
/************************************************************/
-_Bool _stm_was_read(struct object_s *object)
+_Bool _stm_was_read(object_t *object)
{
- return (stm_current_read_markers[((uintptr_t)object) >> 4].c ==
- (unsigned char)(uintptr_t)stm_current_read_markers);
+ return _STM_CRM[((uintptr_t)object) >> 4].c == _STM_TL1.read_marker;
}
-static struct _read_marker_s *get_current_read_marker(struct object_s *object)
+_Bool _stm_was_written(object_t *object)
{
- struct _read_marker_s *crm = _STM_TL1.stm_current_read_markers;
- return crm + (((uintptr_t)object) >> 4);
-}
-
-_Bool _stm_was_written(struct object_s *object)
-{
- uint16_t stv = _STM_TL1.stm_transaction_version;
- return (object->modified == stv);
+ return (object->flags & GCFLAG_WRITE_BARRIER) == 0;
}
struct page_header_s *_stm_reserve_page(void)
{
/* Grab a free mm page, and map it into the address space.
- Return a pointer to it. It has kind == PGKIND_FREED. */
+ Return a pointer to it. */
// XXX look in some free list first
- /* Return the index'th mm page, which is so far NEVER_USED. It
- should never have been accessed so far, and be already mapped
- as the index'th local page. */
- struct shared_descriptor_s *d = stm_shared_descriptor;
- uint64_t index = __sync_fetch_and_add(&d->index_page_never_used, 1);
+ /* Return the index'th object page, which is so far never used. */
+ uint64_t index = __sync_fetch_and_add(
+ &stm_shared_descriptor.index_page_never_used, 1);
if (index >= NB_PAGES) {
fprintf(stderr, "Out of mmap'ed memory!\n");
abort();
}
- struct page_header_s *result = (struct page_header_s *)
- (((char *)stm_shared_descriptor) + index * 4096);
- assert(result->kind == PGKIND_NEVER_USED);
- result->kind = PGKIND_FREED;
- result->pgoff = index;
- return result;
+ return (struct page_header_s *)(stm_object_pages + index * 4096UL);
}
-static struct write_history_s *_reserve_page_write_history(void)
+
+static struct page_header_s *
+fetch_thread_local_page(struct page_header_s *page)
{
- struct page_header_s *newpage = _stm_reserve_page();
- newpage->kind = PGKIND_WRITE_HISTORY;
- return (struct write_history_s *)(newpage + 1);
+ struct page_header_s *mypage = (struct page_header_s *)
+ (((char *)page) + _STM_TL2.gs_value);
+
+ if (!mypage->thread_local_copy) {
+ /* make a thread-local copy of that page, by remapping the page
+ back to its underlying page and manually copying the data. */
+ uint64_t fileofs = ((char *)mypage) - stm_object_pages;
+
+ if (remap_file_pages((void *)mypage, 4096, 0, fileofs / 4096,
+ MAP_PAGES_FLAGS) < 0) {
+ perror("remap_file_pages in write_barrier");
+ abort();
+ }
+ pagecopy(mypage, page);
+ mypage->thread_local_copy = 1;
+ }
+ return mypage;
}
-
-static uint32_t get_pgoff(struct page_header_s *page)
-{
- assert(page->pgoff > 0);
- assert(page->pgoff < NB_PAGES);
- return page->pgoff;
-}
-
-static uint32_t get_local_index(struct page_header_s *page)
-{
- uint64_t index = ((char *)page) - (char *)stm_shared_descriptor;
- assert((index & 4095) == 0);
- index /= 4096;
- assert(0 < index && index < NB_PAGES);
- return index;
-}
-
-static struct page_header_s *get_page_by_local_index(uint32_t index)
-{
- assert(0 < index && index < NB_PAGES);
- uint64_t ofs = ((uint64_t)index) * 4096;
- return (struct page_header_s *)(((char *)stm_shared_descriptor) + ofs);
-}
-
-void _stm_write_slowpath(struct object_s * object)
+void _stm_write_barrier_slowpath(object_t *object)
{
stm_read(object);
struct page_header_s *page;
page = (struct page_header_s *)(((uintptr_t)object) & ~4095);
- assert(2 <= page->kind && page->kind < LARGE_OBJECT_WORDS);
+ assert(2 <= page->obj_word_size);
+ assert(page->obj_word_size < LARGE_OBJECT_WORDS);
- if (page->version != stm_transaction_version) {
- struct page_header_s *newpage = _stm_reserve_page();
- uint32_t old_pgoff = get_pgoff(page);
- uint32_t new_pgoff = get_pgoff(newpage);
+ uint32_t byte_ofs16 = (((char *)object) - (char *)page) / 16;
+ uint32_t pgoff = (((char *)page) - stm_object_pages) / 4096;
- pagecopy(newpage, page);
- newpage->version = stm_transaction_version;
- newpage->modif_head = 0xff;
- newpage->pgoff = new_pgoff;
- assert(page->version != stm_transaction_version);
- assert(page->pgoff == old_pgoff);
+ page = fetch_thread_local_page(page);
- remap_file_pages((void *)page, 4096, 0, new_pgoff, MAP_PAGES_FLAGS);
+ uint32_t write_log_index = page->write_log_index_cache;
+ struct write_history_s *log = _STM_TL2.writes_by_this_transaction;
- assert(page->version == stm_transaction_version);
- assert(page->pgoff == new_pgoff);
+ if (write_log_index >= log->nb_updates ||
+ log->updates[write_log_index].pgoff != pgoff) {
+ /* make a new entry for this page in the write log */
+ write_log_index = log->nb_updates++;
+ assert(log->nb_updates <= _STM_TL2.nb_updates_max); // XXX resize
+ log->updates[write_log_index].pgoff = pgoff;
+ log->updates[write_log_index].bitmask[0] = 0;
+ log->updates[write_log_index].bitmask[1] = 0;
+ log->updates[write_log_index].bitmask[2] = 0;
+ log->updates[write_log_index].bitmask[3] = 0;
+ }
- struct write_history_s *cur = stm_local.writes_by_this_transaction;
- size_t history_size_max = 4096 - (((uintptr_t)cur) & 4095);
- if (sizeof(*cur) + (cur->nb_updates + 1) * 8 > history_size_max) {
- /* The buffer would overflow its page. Allocate a new one. */
- cur = _reserve_page_write_history();
- cur->previous_older_transaction =
- stm_local.writes_by_this_transaction;
- cur->transaction_version = stm_transaction_version;
- cur->nb_updates = 0;
- stm_local.writes_by_this_transaction = cur;
- }
- uint64_t i = cur->nb_updates++;
- cur->updates[i * 2 + 0] = get_local_index(page);
- cur->updates[i * 2 + 1] = new_pgoff;
- }
- object->modified = stm_transaction_version;
- object->modif_next = page->modif_head;
- page->modif_head = (uint8_t)(((uintptr_t)object) >> 4);
- assert(page->modif_head != 0xff);
+ assert(byte_ofs16 < 256);
+ log->updates[write_log_index].bitmask[byte_ofs16 / 64] |=
+ (1UL << (byte_ofs16 & 63));
}
+#if 0
char *_stm_alloc_next_page(size_t i)
{
struct page_header_s *newpage = _stm_reserve_page();
@@ -353,6 +325,7 @@
}
stm_set_read_marker_number(1);
}
+#endif
void stm_setup(void)
{
@@ -364,50 +337,115 @@
fprintf(stderr, "Cannot use more than 1<<32 pages of memory");
abort();
}
- char *stm_pages = mmap(NULL, NB_PAGES*4096ul, PROT_READ|PROT_WRITE,
- MAP_PAGES_FLAGS, -1, 0);
- if (stm_pages == MAP_FAILED) {
- perror("mmap stm_pages failed");
+
+ /* For now, just prepare to make the layout given at the start of
+ this file, with the RM pages interleaved with the L-0 blocks.
+ The actual L-0-RM pages are allocated by each thread. */
+ uint64_t addr_rm_base = (NB_PAGES + 1) * 4096UL;
+ uint64_t addr_object_pages = addr_rm_base << 4;
+
+ stm_object_pages = mmap((void *)addr_object_pages,
+ (NB_PAGES * 4096UL) * NB_THREADS,
+ PROT_READ | PROT_WRITE,
+ MAP_PAGES_FLAGS | MAP_FIXED, -1, 0);
+ if (stm_object_pages == MAP_FAILED) {
+ perror("mmap stm_object_pages failed");
abort();
}
- assert(sizeof(struct shared_descriptor_s) <= 4096);
- stm_shared_descriptor = (struct shared_descriptor_s *)stm_pages;
- stm_shared_descriptor->header.kind = PGKIND_SHARED_DESCRIPTOR;
- /* the page at index 0 contains the '*stm_shared_descriptor' structure */
- /* the page at index 1 is reserved for history_fast_forward() */
- stm_shared_descriptor->index_page_never_used = 2;
- stm_shared_descriptor->next_transaction_version = 2;
+ stm_shared_descriptor.index_page_never_used = 0;
}
void _stm_teardown(void)
{
- munmap((void *)stm_shared_descriptor, NB_PAGES*4096);
- stm_shared_descriptor = NULL;
+ munmap((void *)stm_object_pages, (NB_PAGES * 4096UL) * NB_THREADS);
+ stm_object_pages = NULL;
+ memset(&stm_shared_descriptor, 0, sizeof(stm_shared_descriptor));
}
-void stm_setup_process(void)
+static void set_gs_register(uint64_t value)
{
- memset(&stm_local, 0, sizeof(stm_local));
- stm_local.read_markers = mmap(NULL, NB_PAGES*(4096 >> 4) + 1,
- PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS,
- -1, 0);
- if (stm_local.read_markers == MAP_FAILED) {
- perror("mmap stm_read_markers failed");
+ int result = syscall(SYS_arch_prctl, ARCH_SET_GS, &value);
+ assert(result == 0);
+}
+
+static char *local_L0_pages(uint64_t gs_value)
+{
+ return (char *)(gs_value - 4096UL);
+}
+
+static char *local_RM_pages(uint64_t gs_value)
+{
+ return (char*)gs_value + (((uint64_t)stm_object_pages) >> 4);
+}
+
+int stm_setup_thread(void)
+{
+ int res;
+ int thnum = stm_next_thread_index;
+ int tries = 2 * NB_THREADS;
+ uint64_t gs_value;
+ while (1) {
+ thnum %= NB_THREADS;
+ stm_next_thread_index = thnum + 1;
+
+ if (!--tries) {
+ fprintf(stderr, "too many threads or too many non-fitting mmap\n");
+ abort();
+ }
+
+ gs_value = (thnum+1) * 4096UL * NB_PAGES;
+
+ if (mmap(local_L0_pages(gs_value), 2 * 4096UL, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
+ thnum++;
+ continue;
+ }
+
+ uint64_t nb_rm_pages = (NB_PAGES + 15) >> 4;
+ if (mmap(local_RM_pages(gs_value), nb_rm_pages * 4096UL,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
+ munmap(local_L0_pages(gs_value), 2 * 4096UL);
+ thnum++;
+ continue;
+ }
+ break;
+ }
+
+ res = mprotect(local_L0_pages(gs_value), 4096, PROT_READ | PROT_WRITE);
+ if (res < 0) {
+ perror("remap_file_pages in stm_setup_thread");
abort();
}
-
- assert((stm_set_read_marker_number(42),
- stm_get_read_marker_number() == 42));
- stm_set_read_marker_number(1);
+ res = remap_file_pages(stm_object_pages + gs_value, NB_PAGES * 4096UL, 0,
+ 0, MAP_PAGES_FLAGS);
+ if (res < 0) {
+ perror("remap_file_pages in stm_setup_thread");
+ abort();
+ }
+ set_gs_register(gs_value);
+ _STM_TL2.gs_value = gs_value;
+ _STM_TL1.read_marker = 1;
+ return thnum;
}
-void _stm_teardown_process(void)
+void _stm_restore_state_for_thread(int thread_num)
{
- munmap((void *)stm_local.read_markers, NB_PAGES*(4096 >> 4) + 1);
- memset(&stm_local, 0, sizeof(stm_local));
+ uint64_t gs_value = (thread_num + 1) * 4096UL * NB_PAGES;
+ set_gs_register(gs_value);
+ assert(_STM_TL2.gs_value == gs_value);
}
+void _stm_teardown_thread(void)
+{
+ uint64_t gs_value = _STM_TL2.gs_value;
+ uint64_t nb_rm_pages = (NB_PAGES + 15) >> 4;
+ munmap(local_RM_pages(gs_value), nb_rm_pages * 4096UL);
+ munmap(local_L0_pages(gs_value), 2 * 4096UL);
+ /* accessing _STM_TL2 is invalid here */
+}
+
+#if 0
static size_t get_obj_size_in_words(struct page_header_s *page)
{
size_t result = page->kind;
@@ -615,47 +653,4 @@
}
return !conflict;
}
-
-#ifdef STM_TESTS
-struct local_data_s *_stm_save_local_state(void)
-{
- uint64_t i, page_count = stm_shared_descriptor->index_page_never_used;
- uint32_t *pgoffs;
- struct local_data_s *p = malloc(sizeof(struct local_data_s) +
- page_count * sizeof(uint32_t));
- assert(p != NULL);
- memcpy(p, &stm_local, sizeof(stm_local));
- p->_current_read_markers = stm_current_read_markers;
- p->_transaction_version = stm_transaction_version;
-
- pgoffs = (uint32_t *)(p + 1);
- pgoffs[0] = page_count;
- for (i = 2; i < page_count; i++) {
- pgoffs[i] = get_pgoff(get_page_by_local_index(i));
- }
-
- return p;
-}
-
-void _stm_restore_local_state(struct local_data_s *p)
-{
- uint64_t i, page_count;
- uint32_t *pgoffs;
-
- remap_file_pages((void *)stm_shared_descriptor, 4096 * NB_PAGES,
- 0, 0, MAP_PAGES_FLAGS);
-
- pgoffs = (uint32_t *)(p + 1);
- page_count = pgoffs[0];
- for (i = 2; i < page_count; i++) {
- struct page_header_s *page = get_page_by_local_index(i);
- remap_file_pages((void *)page, 4096, 0, pgoffs[i], MAP_PAGES_FLAGS);
- assert(get_pgoff(page) == pgoffs[i]);
- }
-
- memcpy(&stm_local, p, sizeof(struct local_data_s));
- stm_current_read_markers = p->_current_read_markers;
- stm_transaction_version = p->_transaction_version;
- free(p);
-}
#endif
diff --git a/c5/core.h b/c5/core.h
--- a/c5/core.h
+++ b/c5/core.h
@@ -5,11 +5,11 @@
#define GCOBJECT __attribute__((address_space(256)))
+#define GCFLAG_WRITE_BARRIER 0x01
+
typedef GCOBJECT struct object_s {
/* Every objects starts with one such structure */
- uint16_t modified;
uint8_t flags;
- uint8_t reserved;
} object_t;
struct _read_marker_s {
@@ -20,43 +20,41 @@
};
typedef GCOBJECT struct _thread_local1_s {
- struct _read_marker_s *stm_current_read_markers;
- uint16_t stm_transaction_version; /* always EVEN */
+ uint8_t read_marker;
} _thread_local1_t;
#define _STM_TL1 (((_thread_local1_t *)0)[-1])
+#define _STM_CRM ((GCOBJECT struct _read_marker_s *)0)
+
/************************************************************/
void stm_setup(void);
-void stm_setup_process(void);
+int stm_setup_thread(void);
void stm_start_transaction(void);
_Bool stm_stop_transaction(void);
-struct object_s *stm_allocate(size_t size);
+object_t *stm_allocate(size_t size);
-static inline void stm_read(struct object_s *object)
+static inline void stm_read(object_t *object)
{
- struct _read_marker_s *crm = _STM_TL1.stm_current_read_markers;
- crm[((uintptr_t)object) >> 4].c = (unsigned char)(uintptr_t)crm;
+ _STM_CRM[((uintptr_t)object) >> 4].c = _STM_TL1.read_marker;
}
-void _stm_write_slowpath(struct object_s *);
+void _stm_write_barrier_slowpath(object_t *);
-static inline void stm_write(struct object_s *object)
+static inline void stm_write(object_t *object)
{
- uint16_t stv = _STM_TL1.stm_transaction_version;
- if (__builtin_expect(object->modified != stv, 0))
- _stm_write_slowpath(object);
+ if (__builtin_expect((object->flags & GCFLAG_WRITE_BARRIER) != 0, 0))
+ _stm_write_barrier_slowpath(object);
}
-_Bool _stm_was_read(struct object_s *object);
-_Bool _stm_was_written(struct object_s *object);
+_Bool _stm_was_read(object_t *object);
+_Bool _stm_was_written(object_t *object);
-struct local_data_s *_stm_save_local_state(void);
-void _stm_restore_local_state(struct local_data_s *p);
+void _stm_restore_state_for_thread(int thread_num);
void _stm_teardown(void);
-void _stm_teardown_process(void);
+void _stm_teardown_thread(void);
#endif
diff --git a/c5/demo1.c b/c5/demo1.c
--- a/c5/demo1.c
+++ b/c5/demo1.c
@@ -14,7 +14,7 @@
typedef struct {
- struct object_s header;
+ object_t header;
int val1, val2;
} obj_t;
@@ -66,7 +66,7 @@
static void *run_in_thread(void *arg)
{
- stm_setup_process();
+ stm_setup_thread();
do_run_in_thread((intptr_t)arg);
return NULL;
}
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit