Author: Remi Meier <[email protected]>
Branch: gc-small-uniform
Changeset: r1327:3aa3696e8add
Date: 2014-09-01 14:15 +0200
http://bitbucket.org/pypy/stmgc/changeset/3aa3696e8add/
Log: Somehow merge default
diff too long, truncating to 2000 out of 5104 lines
diff --git a/c7/TODO b/c7/TODO
--- a/c7/TODO
+++ b/c7/TODO
@@ -1,8 +1,6 @@
- use small uniform gcpages
-- write barrier for big arrays
-
- finalizers
- the highest_overflow_number can overflow after 2**30 non-collect-time
@@ -16,3 +14,16 @@
the unused pages away --- or maybe use consecutive addresses from the
lowest ones from segment N, instead of the page corresponding to the page
number in segment 0 (possibly a bit messy)
+
+- possibly messy too, but think about not using N+1 segments but only N
+
+- use a call/cc-style variant of setjmp/longjmp to avoid inevitable
+ transactions when we need to return
+
+- kill "atomic" and use regular lock elision
+
+- increase the memory limit, currently 2.5GB; this requires, apparently,
+ more fighting against LLVM bugs
+
+- avoid __builtin_frame_address(0) in precisely the performance-critical
+ functions like the interpreter main loop
diff --git a/c7/demo/demo2.c b/c7/demo/demo2.c
--- a/c7/demo/demo2.c
+++ b/c7/demo/demo2.c
@@ -43,7 +43,20 @@
n = (struct node_s*)obj;
visit((object_t **)&n->next);
}
-
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2])
+{
+ abort();
+}
+void stmcb_trace_cards(struct object_s *obj, void visit(object_t **),
+ uintptr_t start, uintptr_t stop)
+{
+ abort();
+}
void stmcb_commit_soon() {}
static void expand_marker(char *base, uintptr_t odd_number,
@@ -62,9 +75,8 @@
{
nodeptr_t r_n;
long prev, sum;
- stm_jmpbuf_t here;
- STM_START_TRANSACTION(&stm_thread_local, here);
+ stm_start_transaction(&stm_thread_local);
stm_read((objptr_t)global_chained_list);
r_n = global_chained_list;
@@ -92,11 +104,9 @@
nodeptr_t swap_nodes(nodeptr_t initial)
{
- stm_jmpbuf_t here;
-
assert(initial != NULL);
- STM_START_TRANSACTION(&stm_thread_local, here);
+ stm_start_transaction(&stm_thread_local);
if (stm_thread_local.longest_marker_state != 0) {
fprintf(stderr, "[%p] marker %d for %.6f seconds:\n",
@@ -193,7 +203,7 @@
stm_commit_transaction();
- stm_start_inevitable_transaction(&stm_thread_local);
+ stm_start_transaction(&stm_thread_local);
STM_POP_ROOT(stm_thread_local, global_chained_list); /* update value */
assert(global_chained_list->value == -1);
STM_PUSH_ROOT(stm_thread_local, global_chained_list); /* remains forever
in the shadow stack */
@@ -202,6 +212,11 @@
printf("setup ok\n");
}
+void teardown_list(void)
+{
+ STM_POP_ROOT_RET(stm_thread_local);
+}
+
static sem_t done;
@@ -215,7 +230,9 @@
void *demo2(void *arg)
{
int status;
+ rewind_jmp_buf rjbuf;
stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
char *org = (char *)stm_thread_local.shadowstack;
STM_PUSH_ROOT(stm_thread_local, global_chained_list); /* remains forever
in the shadow stack */
@@ -235,6 +252,7 @@
STM_POP_ROOT(stm_thread_local, global_chained_list);
OPT_ASSERT(org == (char *)stm_thread_local.shadowstack);
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
unregister_thread_local();
status = sem_post(&done); assert(status == 0);
return NULL;
@@ -271,11 +289,13 @@
int main(void)
{
int status, i;
+ rewind_jmp_buf rjbuf;
status = sem_init(&done, 0, 0); assert(status == 0);
stm_setup();
stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
stmcb_expand_marker = expand_marker;
@@ -292,9 +312,11 @@
final_check();
+ teardown_list();
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
unregister_thread_local();
- stm_teardown();
+ //stm_teardown();
return 0;
}
diff --git a/c7/demo/demo_largemalloc.c b/c7/demo/demo_largemalloc.c
--- a/c7/demo/demo_largemalloc.c
+++ b/c7/demo/demo_largemalloc.c
@@ -24,6 +24,18 @@
}
void stmcb_commit_soon() {}
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
+void stmcb_trace_cards(struct object_s *obj, void cb(object_t **),
+ uintptr_t start, uintptr_t stop) {
+ abort();
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2]) {
+ abort();
+}
/************************************************************/
@@ -67,7 +79,7 @@
int i;
arena_data = malloc(ARENA_SIZE);
assert(arena_data != NULL);
- _stm_mutex_pages_lock();
+ //_stm_mutex_pages_lock();
for (i = 0; i < 25; i++)
timing(i);
return 0;
diff --git a/c7/demo/demo_random.c b/c7/demo/demo_random.c
--- a/c7/demo/demo_random.c
+++ b/c7/demo/demo_random.c
@@ -80,6 +80,18 @@
}
void stmcb_commit_soon() {}
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
+void stmcb_trace_cards(struct object_s *obj, void cb(object_t **),
+ uintptr_t start, uintptr_t stop) {
+ abort();
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2]) {
+ abort();
+}
int get_rand(int max)
{
@@ -323,15 +335,15 @@
void *demo_random(void *arg)
{
int status;
+ rewind_jmp_buf rjbuf;
stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
setup_thread();
objptr_t p;
- stm_jmpbuf_t here;
- volatile int call_fork = (arg != NULL);
- STM_START_TRANSACTION(&stm_thread_local, here);
+ stm_start_transaction(&stm_thread_local);
assert(td.num_roots >= td.num_roots_at_transaction_start);
td.num_roots = td.num_roots_at_transaction_start;
p = NULL;
@@ -349,11 +361,12 @@
if (p == (objptr_t)-1) {
push_roots();
+ long call_fork = (arg != NULL && *(long *)arg);
if (call_fork == 0) { /* common case */
stm_commit_transaction();
td.num_roots_at_transaction_start = td.num_roots;
if (get_rand(100) < 98) {
- STM_START_TRANSACTION(&stm_thread_local, here);
+ stm_start_transaction(&stm_thread_local);
} else {
stm_start_inevitable_transaction(&stm_thread_local);
}
@@ -365,7 +378,7 @@
else {
/* run a fork() inside the transaction */
printf("========== FORK =========\n");
- call_fork = 0;
+ *(long*)arg = 0;
pid_t child = fork();
printf("=== in process %d thread %lx, fork() returned %d\n",
(int)getpid(), (long)pthread_self(), (int)child);
@@ -383,8 +396,19 @@
}
}
}
+ push_roots();
stm_commit_transaction();
+ /* even out the shadow stack before leaveframe: */
+ stm_start_inevitable_transaction(&stm_thread_local);
+ while (td.num_roots > 0) {
+ td.num_roots--;
+ objptr_t t;
+ STM_POP_ROOT(stm_thread_local, t);
+ }
+ stm_commit_transaction();
+
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
stm_unregister_thread_local(&stm_thread_local);
status = sem_post(&done); assert(status == 0);
@@ -433,6 +457,7 @@
int main(void)
{
int i, status;
+ rewind_jmp_buf rjbuf;
/* pick a random seed from the time in seconds.
A bit pointless for now... because the interleaving of the
@@ -446,6 +471,7 @@
stm_setup();
stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
setup_globals();
@@ -463,7 +489,7 @@
long forkbase = NUMTHREADS * THREAD_STARTS / (FORKS + 1);
long _fork = (thread_starts % forkbase) == 0;
thread_starts--;
- newthread(demo_random, (void *)_fork);
+ newthread(demo_random, &_fork);
}
}
@@ -483,6 +509,7 @@
printf("Test OK!\n");
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
stm_unregister_thread_local(&stm_thread_local);
stm_teardown();
diff --git a/c7/demo/demo_random2.c b/c7/demo/demo_random2.c
new file mode 100644
--- /dev/null
+++ b/c7/demo/demo_random2.c
@@ -0,0 +1,540 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "stmgc.h"
+
+#define NUMTHREADS 3
+#define STEPS_PER_THREAD 500
+#define THREAD_STARTS 1000 // how many restarts of threads
+#define PREBUILT_ROOTS 3
+#define FORKS 3
+
+#define ACTIVE_ROOTS_SET_SIZE 100 // max num of roots created/alive in one
transaction
+#define MAX_ROOTS_ON_SS 1000 // max on shadow stack
+
+// SUPPORT
+struct node_s;
+typedef TLPREFIX struct node_s node_t;
+typedef node_t* nodeptr_t;
+typedef object_t* objptr_t;
+int num_forked_children = 0;
+
+struct node_s {
+ struct object_s hdr;
+ int sig;
+ long my_size;
+ long my_id;
+ long my_hash;
+ nodeptr_t next;
+};
+
+#define SIGNATURE 0x01234567
+
+
+static sem_t done;
+__thread stm_thread_local_t stm_thread_local;
+__thread void *thread_may_fork;
+
+// global and per-thread-data
+time_t default_seed;
+objptr_t prebuilt_roots[PREBUILT_ROOTS];
+
+struct thread_data {
+ unsigned int thread_seed;
+ int steps_left;
+ objptr_t active_roots_set[ACTIVE_ROOTS_SET_SIZE];
+ int active_roots_num;
+ long roots_on_ss;
+ long roots_on_ss_at_tr_start;
+};
+__thread struct thread_data td;
+
+struct thread_data *_get_td(void)
+{
+ return &td; /* for gdb */
+}
+
+
+ssize_t stmcb_size_rounded_up(struct object_s *ob)
+{
+ return ((struct node_s*)ob)->my_size;
+}
+
+void stmcb_trace(struct object_s *obj, void visit(object_t **))
+{
+ struct node_s *n;
+ n = (struct node_s*)obj;
+
+ /* and the same value at the end: */
+ /* note, ->next may be the same as last_next */
+ nodeptr_t *last_next = (nodeptr_t*)((char*)n + n->my_size - sizeof(void*));
+
+ assert(n->next == *last_next);
+
+ visit((object_t **)&n->next);
+ visit((object_t **)last_next);
+
+ assert(n->next == *last_next);
+}
+
+void stmcb_commit_soon() {}
+
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
+void stmcb_trace_cards(struct object_s *obj, void cb(object_t **),
+ uintptr_t start, uintptr_t stop) {
+ abort();
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2]) {
+ abort();
+}
+
+int get_rand(int max)
+{
+ if (max == 0)
+ return 0;
+ return (int)(rand_r(&td.thread_seed) % (unsigned int)max);
+}
+
+objptr_t get_random_root()
+{
+ /* get some root from shadowstack or active_root_set or prebuilt_roots */
+ int num = get_rand(3);
+ intptr_t ss_size = td.roots_on_ss;
+ if (num == 0 && ss_size > 0) {
+ num = get_rand(ss_size);
+ /* XXX: impl detail: there is already a "-1" on the SS -> +1 */
+ objptr_t r = (objptr_t)stm_thread_local.shadowstack_base[num+1].ss;
+ OPT_ASSERT((((uintptr_t)r) & 3) == 0);
+ }
+
+ if (num == 1 && td.active_roots_num > 0) {
+ num = get_rand(td.active_roots_num);
+ return td.active_roots_set[num];
+ } else {
+ num = get_rand(PREBUILT_ROOTS);
+ return prebuilt_roots[num];
+ }
+}
+
+
+long push_roots()
+{
+ int i;
+ long to_push = td.active_roots_num;
+ long not_pushed = 0;
+ for (i = to_push - 1; i >= 0; i--) {
+ td.active_roots_num--;
+ if (td.roots_on_ss < MAX_ROOTS_ON_SS) {
+ STM_PUSH_ROOT(stm_thread_local, td.active_roots_set[i]);
+ td.roots_on_ss++;
+ } else {
+ not_pushed++;
+ }
+ }
+ return to_push - not_pushed;
+}
+
+void add_root(objptr_t r);
+void pop_roots(long to_pop)
+{
+ int i;
+ for (i = 0; i < to_pop; i++) {
+ objptr_t t;
+ STM_POP_ROOT(stm_thread_local, t);
+ add_root(t);
+ td.roots_on_ss--;
+ }
+}
+
+void del_root(int idx)
+{
+ int i;
+
+ for (i = idx; i < td.active_roots_num - 1; i++)
+ td.active_roots_set[i] = td.active_roots_set[i + 1];
+ td.active_roots_num--;
+}
+
+void add_root(objptr_t r)
+{
+ if (r && td.active_roots_num < ACTIVE_ROOTS_SET_SIZE) {
+ td.active_roots_set[td.active_roots_num++] = r;
+ }
+}
+
+
+void read_barrier(objptr_t p)
+{
+ if (p != NULL) {
+ stm_read(p);
+ }
+}
+
+void write_barrier(objptr_t p)
+{
+ if (p != NULL) {
+ stm_write(p);
+ }
+}
+
+void set_next(objptr_t p, objptr_t v)
+{
+ if (p != NULL) {
+ nodeptr_t n = (nodeptr_t)p;
+
+ /* and the same value at the end: */
+ nodeptr_t TLPREFIX *last_next = (nodeptr_t TLPREFIX *)((stm_char*)n +
n->my_size - sizeof(void*));
+ assert(n->next == *last_next);
+ n->next = (nodeptr_t)v;
+ *last_next = (nodeptr_t)v;
+ }
+}
+
+nodeptr_t get_next(objptr_t p)
+{
+ nodeptr_t n = (nodeptr_t)p;
+
+ /* and the same value at the end: */
+ nodeptr_t TLPREFIX *last_next = (nodeptr_t TLPREFIX *)((stm_char*)n +
n->my_size - sizeof(void*));
+ OPT_ASSERT(n->next == *last_next);
+
+ return n->next;
+}
+
+
+objptr_t simple_events(objptr_t p, objptr_t _r)
+{
+ int k = get_rand(10);
+ long pushed;
+
+ switch (k) {
+ case 0: // remove a root
+ if (td.active_roots_num) {
+ del_root(get_rand(td.active_roots_num));
+ }
+ break;
+ case 1: // add 'p' to roots
+ add_root(p);
+ break;
+ case 2: // set 'p' to point to a root
+ if (_r)
+ p = _r;
+ break;
+ case 3: // allocate fresh 'p'
+ pushed = push_roots();
+ size_t sizes[4] = {sizeof(struct node_s),
+ sizeof(struct node_s) + (get_rand(100000) & ~15),
+ sizeof(struct node_s) + 4096,
+ sizeof(struct node_s) + 4096*70};
+ size_t size = sizes[get_rand(4)];
+ p = stm_allocate(size);
+ ((nodeptr_t)p)->sig = SIGNATURE;
+ ((nodeptr_t)p)->my_size = size;
+ ((nodeptr_t)p)->my_id = 0;
+ ((nodeptr_t)p)->my_hash = 0;
+ pop_roots(pushed);
+ break;
+ case 4: // read and validate 'p'
+ read_barrier(p);
+ break;
+ case 5: // only do a stm_write_barrier
+ write_barrier(p);
+ break;
+ case 6: // follow p->next
+ if (p) {
+ read_barrier(p);
+ p = (objptr_t)(get_next(p));
+ }
+ break;
+ case 7: // set 'p' as *next in one of the roots
+ write_barrier(_r);
+ set_next(_r, p);
+ break;
+ case 8: // id checking
+ if (p) {
+ nodeptr_t n = (nodeptr_t)p;
+ if (n->my_id == 0) {
+ write_barrier(p);
+ n->my_id = stm_id(p);
+ }
+ else {
+ read_barrier(p);
+ assert(n->my_id == stm_id(p));
+ }
+ }
+ break;
+ case 9:
+ if (p) {
+ nodeptr_t n = (nodeptr_t)p;
+ if (n->my_hash == 0) {
+ write_barrier(p);
+ n->my_hash = stm_identityhash(p);
+ }
+ else {
+ read_barrier(p);
+ assert(n->my_hash == stm_identityhash(p));
+ }
+ }
+ break;
+ }
+ return p;
+}
+
+void frame_loop();
+objptr_t do_step(objptr_t p)
+{
+ objptr_t _r;
+ int k;
+
+ _r = get_random_root();
+ k = get_rand(12);
+
+ if (k < 10) {
+ p = simple_events(p, _r);
+ } else if (get_rand(20) == 1) {
+ long pushed = push_roots();
+ stm_commit_transaction();
+ td.roots_on_ss_at_tr_start = td.roots_on_ss;
+
+ if (get_rand(100) < 98) {
+ stm_start_transaction(&stm_thread_local);
+ } else {
+ stm_start_inevitable_transaction(&stm_thread_local);
+ }
+ td.roots_on_ss = td.roots_on_ss_at_tr_start;
+ td.active_roots_num = 0;
+ pop_roots(pushed);
+ p = NULL;
+ } else if (get_rand(10) == 1) {
+ long pushed = push_roots();
+ /* leaving our frame */
+ frame_loop();
+ /* back in our frame */
+ pop_roots(pushed);
+ p = NULL;
+ } else if (get_rand(20) == 1) {
+ long pushed = push_roots();
+ stm_become_inevitable(&stm_thread_local, "please");
+ assert(stm_is_inevitable());
+ pop_roots(pushed);
+ p= NULL;
+ } else if (get_rand(20) == 1) {
+ p = (objptr_t)-1; // possibly fork
+ } else if (get_rand(20) == 1) {
+ long pushed = push_roots();
+ stm_become_globally_unique_transaction(&stm_thread_local, "really");
+ fprintf(stderr, "[GUT/%d]", (int)STM_SEGMENT->segment_num);
+ pop_roots(pushed);
+ p = NULL;
+ }
+ return p;
+}
+
+void frame_loop()
+{
+ objptr_t p = NULL;
+ rewind_jmp_buf rjbuf;
+
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
+ //fprintf(stderr,"%p F: %p\n", STM_SEGMENT->running_thread,
__builtin_frame_address(0));
+
+ long roots_on_ss = td.roots_on_ss;
+ /* "interpreter main loop": this is one "application-frame" */
+ while (td.steps_left-->0 && get_rand(10) != 0) {
+ if (td.steps_left % 8 == 0)
+ fprintf(stdout, "#");
+
+ assert(p == NULL || ((nodeptr_t)p)->sig == SIGNATURE);
+
+ p = do_step(p);
+
+
+ if (p == (objptr_t)-1) {
+ p = NULL;
+
+ long call_fork = (thread_may_fork != NULL && *(long
*)thread_may_fork);
+ if (call_fork) { /* common case */
+ long pushed = push_roots();
+ /* run a fork() inside the transaction */
+ printf("========== FORK =========\n");
+ *(long*)thread_may_fork = 0;
+ pid_t child = fork();
+ printf("=== in process %d thread %lx, fork() returned %d\n",
+ (int)getpid(), (long)pthread_self(), (int)child);
+ if (child == -1) {
+ fprintf(stderr, "fork() error: %m\n");
+ abort();
+ }
+ if (child != 0)
+ num_forked_children++;
+ else
+ num_forked_children = 0;
+
+ pop_roots(pushed);
+ }
+ }
+ }
+ OPT_ASSERT(roots_on_ss == td.roots_on_ss);
+
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
+}
+
+
+
+void setup_thread()
+{
+ memset(&td, 0, sizeof(struct thread_data));
+
+ /* stupid check because gdb shows garbage
+ in td.roots: */
+ int i;
+ for (i = 0; i < ACTIVE_ROOTS_SET_SIZE; i++)
+ assert(td.active_roots_set[i] == NULL);
+
+ td.thread_seed = default_seed++;
+ td.steps_left = STEPS_PER_THREAD;
+ td.active_roots_num = 0;
+ td.roots_on_ss = 0;
+ td.roots_on_ss_at_tr_start = 0;
+}
+
+
+
+void *demo_random(void *arg)
+{
+ int status;
+ rewind_jmp_buf rjbuf;
+ stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
+
+ setup_thread();
+
+ td.roots_on_ss_at_tr_start = 0;
+ stm_start_transaction(&stm_thread_local);
+ td.roots_on_ss = td.roots_on_ss_at_tr_start;
+ td.active_roots_num = 0;
+
+ thread_may_fork = arg;
+ while (td.steps_left-->0) {
+ frame_loop();
+ }
+
+ stm_commit_transaction();
+
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
+ stm_unregister_thread_local(&stm_thread_local);
+
+ status = sem_post(&done); assert(status == 0);
+ return NULL;
+}
+
+void newthread(void*(*func)(void*), void *arg)
+{
+ pthread_t th;
+ int status = pthread_create(&th, NULL, func, arg);
+ if (status != 0)
+ abort();
+ pthread_detach(th);
+ printf("started new thread\n");
+}
+
+
+void setup_globals()
+{
+ int i;
+
+ struct node_s prebuilt_template = {
+ .sig = SIGNATURE,
+ .my_size = sizeof(struct node_s),
+ .my_id = 0,
+ .my_hash = 0,
+ .next = NULL
+ };
+
+ stm_start_inevitable_transaction(&stm_thread_local);
+ for (i = 0; i < PREBUILT_ROOTS; i++) {
+ void* new_templ = malloc(sizeof(struct node_s));
+ memcpy(new_templ, &prebuilt_template, sizeof(struct node_s));
+ prebuilt_roots[i] = stm_setup_prebuilt((objptr_t)(long)new_templ);
+
+ if (i % 2 == 0) {
+ int hash = i + 5;
+ stm_set_prebuilt_identityhash(prebuilt_roots[i],
+ hash);
+ ((nodeptr_t)prebuilt_roots[i])->my_hash = hash;
+ }
+ }
+ stm_commit_transaction();
+}
+
+int main(void)
+{
+ int i, status;
+ rewind_jmp_buf rjbuf;
+
+ /* pick a random seed from the time in seconds.
+ A bit pointless for now... because the interleaving of the
+ threads is really random. */
+ default_seed = time(NULL);
+ printf("running with seed=%lld\n", (long long)default_seed);
+
+ status = sem_init(&done, 0, 0);
+ assert(status == 0);
+
+
+ stm_setup();
+ stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
+
+ setup_globals();
+
+ int thread_starts = NUMTHREADS * THREAD_STARTS;
+ for (i = 0; i < NUMTHREADS; i++) {
+ newthread(demo_random, NULL);
+ thread_starts--;
+ }
+
+ for (i=0; i < NUMTHREADS * THREAD_STARTS; i++) {
+ status = sem_wait(&done);
+ assert(status == 0);
+ printf("thread finished\n");
+ if (thread_starts) {
+ long forkbase = NUMTHREADS * THREAD_STARTS / (FORKS + 1);
+ long _fork = (thread_starts % forkbase) == 0;
+ thread_starts--;
+ newthread(demo_random, &_fork);
+ }
+ }
+
+ for (i = 0; i < num_forked_children; i++) {
+ pid_t child = wait(&status);
+ if (child == -1)
+ perror("wait");
+ printf("From %d: child %d terminated with exit status %d\n",
+ (int)getpid(), (int)child, status);
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ ;
+ else {
+ printf("*** error from the child ***\n");
+ return 1;
+ }
+ }
+
+ printf("Test OK!\n");
+
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
+ stm_unregister_thread_local(&stm_thread_local);
+ stm_teardown();
+
+ return 0;
+}
diff --git a/c7/demo/demo_simple.c b/c7/demo/demo_simple.c
--- a/c7/demo/demo_simple.c
+++ b/c7/demo/demo_simple.c
@@ -10,7 +10,7 @@
# include "stmgc.h"
#endif
-#define ITERS 1000000
+#define ITERS 100000
#define NTHREADS 2
@@ -38,27 +38,40 @@
n = (struct node_s*)obj;
visit((object_t **)&n->next);
}
-
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
void stmcb_commit_soon() {}
+void stmcb_trace_cards(struct object_s *obj, void cb(object_t **),
+ uintptr_t start, uintptr_t stop) {
+ abort();
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2]) {
+ abort();
+}
static sem_t done;
static __thread int tl_counter = 0;
-static int gl_counter = 0;
+//static int gl_counter = 0;
void *demo2(void *arg)
{
int status;
+ rewind_jmp_buf rjbuf;
stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
char *org = (char *)stm_thread_local.shadowstack;
tl_counter = 0;
object_t *tmp;
int i = 0;
while (i < ITERS) {
- stm_start_inevitable_transaction(&stm_thread_local);
+ stm_start_transaction(&stm_thread_local);
tl_counter++;
if (i % 500 < 250)
STM_PUSH_ROOT(stm_thread_local, stm_allocate(16));//gl_counter++;
@@ -68,8 +81,9 @@
i++;
}
- assert(org == (char *)stm_thread_local.shadowstack);
+ OPT_ASSERT(org == (char *)stm_thread_local.shadowstack);
+ stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
stm_unregister_thread_local(&stm_thread_local);
status = sem_post(&done); assert(status == 0);
return NULL;
diff --git a/c7/demo/test_shadowstack.c b/c7/demo/test_shadowstack.c
new file mode 100644
--- /dev/null
+++ b/c7/demo/test_shadowstack.c
@@ -0,0 +1,74 @@
+#include <stdlib.h>
+#include <assert.h>
+#include "stmgc.h"
+
+stm_thread_local_t stm_thread_local;
+
+typedef TLPREFIX struct node_s node_t;
+
+struct node_s {
+ struct object_s hdr;
+ long value;
+};
+
+ssize_t stmcb_size_rounded_up(struct object_s *ob)
+{
+ return sizeof(struct node_s);
+}
+void stmcb_trace(struct object_s *obj, void visit(object_t **))
+{
+}
+long stmcb_obj_supports_cards(struct object_s *obj)
+{
+ return 0;
+}
+void stmcb_get_card_base_itemsize(struct object_s *obj,
+ uintptr_t offset_itemsize[2])
+{
+ abort();
+}
+void stmcb_trace_cards(struct object_s *obj, void visit(object_t **),
+ uintptr_t start, uintptr_t stop)
+{
+ abort();
+}
+void stmcb_commit_soon() {}
+
+
+int main(void)
+{
+ rewind_jmp_buf rjbuf;
+
+ stm_setup();
+ stm_register_thread_local(&stm_thread_local);
+ stm_rewind_jmp_enterframe(&stm_thread_local, &rjbuf);
+
+ stm_start_transaction(&stm_thread_local);
+ node_t *node = (node_t *)stm_allocate(sizeof(struct node_s));
+ node->value = 129821;
+ STM_PUSH_ROOT(stm_thread_local, node);
+ STM_PUSH_ROOT(stm_thread_local, 333); /* odd value */
+ stm_commit_transaction();
+
+ /* now in a new transaction, pop the node off the shadowstack, but
+ then do a major collection. It should still be found by the
+ tracing logic. */
+ stm_start_transaction(&stm_thread_local);
+ STM_POP_ROOT_RET(stm_thread_local);
+ STM_POP_ROOT(stm_thread_local, node);
+ assert(node->value == 129821);
+ STM_PUSH_ROOT(stm_thread_local, NULL);
+ stm_collect(9);
+
+ node_t *node2 = (node_t *)stm_allocate(sizeof(struct node_s));
+ assert(node2 != node);
+ assert(node->value == 129821);
+
+ STM_PUSH_ROOT(stm_thread_local, node2);
+ stm_collect(0);
+ STM_POP_ROOT(stm_thread_local, node2);
+ assert(node2 != node);
+ assert(node->value == 129821);
+
+ return 0;
+}
diff --git a/c7/stm/contention.c b/c7/stm/contention.c
--- a/c7/stm/contention.c
+++ b/c7/stm/contention.c
@@ -98,13 +98,14 @@
/************************************************************/
-static void contention_management(uint8_t other_segment_num,
+static bool contention_management(uint8_t other_segment_num,
enum contention_kind_e kind,
object_t *obj)
{
assert(_has_mutex());
assert(other_segment_num != STM_SEGMENT->segment_num);
+ bool others_may_have_run = false;
if (must_abort())
abort_with_mutex();
@@ -152,6 +153,7 @@
if (contmgr.try_sleep && kind != WRITE_WRITE_CONTENTION &&
contmgr.other_pseg->safe_point != SP_WAIT_FOR_C_TRANSACTION_DONE) {
+ others_may_have_run = true;
/* Sleep.
- Not for write-write contentions, because we're not at a
@@ -192,7 +194,7 @@
/* tell the other to commit ASAP, since it causes aborts */
signal_other_to_commit_soon(contmgr.other_pseg);
- dprintf(("abort in contention\n"));
+ dprintf(("abort in contention: kind %d\n", kind));
STM_SEGMENT->nursery_end = abort_category;
marker_contention(kind, false, other_segment_num, obj);
abort_with_mutex();
@@ -225,6 +227,7 @@
if (must_abort())
abort_with_mutex();
+ others_may_have_run = true;
dprintf(("contention: wait C_ABORTED...\n"));
cond_wait(C_ABORTED);
dprintf(("contention: done\n"));
@@ -278,6 +281,7 @@
stmcb_commit_soon();
}
}
+ return others_may_have_run;
}
static void write_write_contention_management(uintptr_t lock_idx,
@@ -301,10 +305,10 @@
s_mutex_unlock();
}
-static void write_read_contention_management(uint8_t other_segment_num,
+static bool write_read_contention_management(uint8_t other_segment_num,
object_t *obj)
{
- contention_management(other_segment_num, WRITE_READ_CONTENTION, obj);
+ return contention_management(other_segment_num, WRITE_READ_CONTENTION,
obj);
}
static void inevitable_contention_management(uint8_t other_segment_num)
diff --git a/c7/stm/contention.h b/c7/stm/contention.h
--- a/c7/stm/contention.h
+++ b/c7/stm/contention.h
@@ -1,7 +1,7 @@
static void write_write_contention_management(uintptr_t lock_idx,
object_t *obj);
-static void write_read_contention_management(uint8_t other_segment_num,
+static bool write_read_contention_management(uint8_t other_segment_num,
object_t *obj);
static void inevitable_contention_management(uint8_t other_segment_num);
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -40,26 +40,67 @@
#endif
}
-void _stm_write_slowpath(object_t *obj)
+__attribute__((always_inline))
+static void write_slowpath_overflow_obj(object_t *obj, bool mark_card)
+{
+ /* An overflow object is an object from the same transaction, but
+ outside the nursery. More precisely, it is no longer young,
+ i.e. it comes from before the most recent minor collection.
+ */
+ assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
+
+ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+ if (!mark_card) {
+ /* The basic case, with no card marking. We append the object
+ into 'objects_pointing_to_nursery', and remove the flag so
+ that the write_slowpath will not be called again until the
+ next minor collection. */
+ if (obj->stm_flags & GCFLAG_CARDS_SET) {
+ /* if we clear this flag, we also need to clear the cards */
+ _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+ obj, CARD_CLEAR, false);
+ }
+ obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+ LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+ }
+ else {
+ /* Card marking. Don't remove GCFLAG_WRITE_BARRIER because we
+ need to come back to _stm_write_slowpath_card() for every
+ card to mark. Add GCFLAG_CARDS_SET. */
+ assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+ obj->stm_flags |= GCFLAG_CARDS_SET;
+ assert(STM_PSEGMENT->old_objects_with_cards);
+ LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj);
+ }
+}
+
+__attribute__((always_inline))
+static void write_slowpath_common(object_t *obj, bool mark_card)
{
assert(_seems_to_be_running_transaction());
assert(!_is_young(obj));
assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
- /* is this an object from the same transaction, outside the nursery? */
- if ((obj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) ==
- STM_PSEGMENT->overflow_number) {
+ uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj);
- dprintf_test(("write_slowpath %p -> ovf obj_to_nurs\n", obj));
- obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
- assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
- LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+ if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) {
+ assert(write_locks[base_lock_idx] == 0);
+ write_slowpath_overflow_obj(obj, mark_card);
return;
}
+ /* Else, it's an old object and we need to privatise it.
+ Do a read-barrier now. Note that this must occur before the
+ safepoints that may be issued in write_write_contention_management().
+ */
+ stm_read(obj);
- /* do a read-barrier now. Note that this must occur before the
- safepoints that may be issued in write_write_contention_management(). */
- stm_read(obj);
+ /* Take the segment's own lock number */
+ uint8_t lock_num = STM_PSEGMENT->write_lock_num;
+
+ /* If CARDS_SET, we entered here at least once already, so we
+ already own the write_lock */
+ assert(IMPLY(obj->stm_flags & GCFLAG_CARDS_SET,
+ write_locks[base_lock_idx] == lock_num));
/* XXX XXX XXX make the logic of write-locking objects optional! */
@@ -68,16 +109,14 @@
'modified_old_objects' (but, because it had GCFLAG_WRITE_BARRIER,
not in 'objects_pointing_to_nursery'). We'll detect this case
by finding that we already own the write-lock. */
- uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START;
- uint8_t lock_num = STM_PSEGMENT->write_lock_num;
- assert(lock_idx < sizeof(write_locks));
+
retry:
- if (write_locks[lock_idx] == 0) {
+ if (write_locks[base_lock_idx] == 0) {
/* A lock to prevent reading garbage from
lookup_other_thread_recorded_marker() */
acquire_marker_lock(STM_SEGMENT->segment_base);
- if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx],
+ if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[base_lock_idx],
0, lock_num))) {
release_marker_lock(STM_SEGMENT->segment_base);
goto retry;
@@ -119,16 +158,15 @@
realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
- /* that's the page *following* the last page with the object */
- end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL;
+ /* get the last page containing data from the object */
+ end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
- for (i = first_page; i < end_page; i++) {
+ for (i = first_page; i <= end_page; i++) {
page_privatize(i);
}
}
}
- else if (write_locks[lock_idx] == lock_num) {
- OPT_ASSERT(STM_PSEGMENT->objects_pointing_to_nursery != NULL);
+ else if (write_locks[base_lock_idx] == lock_num) {
#ifdef STM_TESTS
bool found = false;
LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t *,
@@ -139,17 +177,10 @@
else {
/* call the contention manager, and then retry (unless we were
aborted). */
- write_write_contention_management(lock_idx, obj);
+ write_write_contention_management(base_lock_idx, obj);
goto retry;
}
- /* A common case for write_locks[] that was either 0 or lock_num:
- we need to add the object to 'objects_pointing_to_nursery'
- if there is such a list. */
- if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) {
- dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj));
- LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
- }
/* check that we really have a private page */
assert(is_private_page(STM_SEGMENT->segment_num,
@@ -158,16 +189,125 @@
/* check that so far all copies of the object have the flag */
check_flag_write_barrier(obj);
- /* remove GCFLAG_WRITE_BARRIER, but only if we succeeded in
- getting the write-lock */
assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
- obj->stm_flags &= ~GCFLAG_WRITE_BARRIER;
+ if (!mark_card) {
+ /* A common case for write_locks[] that was either 0 or lock_num:
+ we need to add the object to the appropriate list if there is one.
+ */
+ if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) {
+ dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj));
+ LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj);
+ }
+
+ if (obj->stm_flags & GCFLAG_CARDS_SET) {
+ /* if we clear this flag, we have to tell sync_old_objs that
+ everything needs to be synced */
+ _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+ obj, CARD_MARKED_OLD, true); /* mark all */
+ }
+
+ /* remove GCFLAG_WRITE_BARRIER if we succeeded in getting the base
+ write-lock (not for card marking). */
+ obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET);
+ }
+ else {
+ /* don't remove WRITE_BARRIER, but add CARDS_SET */
+ obj->stm_flags |= GCFLAG_CARDS_SET;
+ assert(STM_PSEGMENT->old_objects_with_cards);
+ LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj);
+ }
/* for sanity, check again that all other segment copies of this
object still have the flag (so privatization worked) */
check_flag_write_barrier(obj);
}
+void _stm_write_slowpath(object_t *obj)
+{
+ write_slowpath_common(obj, /*mark_card=*/false);
+}
+
+static bool obj_should_use_cards(object_t *obj)
+{
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ long supports = stmcb_obj_supports_cards(realobj);
+ if (!supports)
+ return 0;
+
+ /* check also if it makes sense: */
+ size_t size = stmcb_size_rounded_up(realobj);
+ return (size >= _STM_MIN_CARD_OBJ_SIZE);
+}
+
+char _stm_write_slowpath_card_extra(object_t *obj)
+{
+ /* the PyPy JIT calls this function directly if it finds that an
+ array doesn't have the GCFLAG_CARDS_SET */
+ bool mark_card = obj_should_use_cards(obj);
+ write_slowpath_common(obj, mark_card);
+ return mark_card;
+}
+
+long _stm_write_slowpath_card_extra_base(void)
+{
+ /* for the PyPy JIT: _stm_write_slowpath_card_extra_base[obj >> 4]
+ is the byte that must be set to CARD_MARKED. The logic below
+ does the same, but more explicitly. */
+ return (((long)write_locks) - WRITELOCK_START + 1)
+ + 0x4000000000000000L; // <- workaround for a clang bug :-(
+}
+
+void _stm_write_slowpath_card(object_t *obj, uintptr_t index)
+{
+ /* If CARDS_SET is not set so far, issue a normal write barrier.
+ If the object is large enough, ask it to set up the object for
+ card marking instead.
+ */
+ if (!(obj->stm_flags & GCFLAG_CARDS_SET)) {
+ char mark_card = _stm_write_slowpath_card_extra(obj);
+ if (!mark_card)
+ return;
+ }
+
+ dprintf_test(("write_slowpath_card %p -> index:%lu\n",
+ obj, index));
+
+ /* We reach this point if we have to mark the card.
+ */
+ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+ assert(obj->stm_flags & GCFLAG_CARDS_SET);
+ assert(!(obj->stm_flags & GCFLAG_SMALL_UNIFORM)); /* not supported/tested
*/
+
+#ifndef NDEBUG
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ size_t size = stmcb_size_rounded_up(realobj);
+ /* we need at least one lock in addition to the STM-reserved object
+ write-lock */
+ assert(size >= 32);
+ /* the 'index' must be in range(length-of-obj), but we don't have
+ a direct way to know the length. We know that it is smaller
+ than the size in bytes. */
+ assert(index < size);
+#endif
+
+ /* Write into the card's lock. This is used by the next minor
+ collection to know what parts of the big object may have changed.
+ We already own the object here or it is an overflow obj. */
+ uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj);
+ uintptr_t card_lock_idx = base_lock_idx + get_index_to_card_index(index);
+ write_locks[card_lock_idx] = CARD_MARKED;
+
+ /* More debug checks */
+ dprintf(("mark %p index %lu, card:%lu with %d\n",
+ obj, index, get_index_to_card_index(index), CARD_MARKED));
+ assert(IMPLY(IS_OVERFLOW_OBJ(STM_PSEGMENT, obj),
+ write_locks[base_lock_idx] == 0));
+ assert(IMPLY(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj),
+ write_locks[base_lock_idx] == STM_PSEGMENT->write_lock_num));
+}
+
static void reset_transaction_read_version(void)
{
/* force-reset all read markers to 0 */
@@ -188,14 +328,12 @@
STM_SEGMENT->transaction_read_version = 1;
}
-void _stm_start_transaction(stm_thread_local_t *tl, stm_jmpbuf_t *jmpbuf)
+static void _stm_start_transaction(stm_thread_local_t *tl, bool inevitable)
{
assert(!_stm_in_transaction(tl));
- s_mutex_lock();
-
retry:
- if (jmpbuf == NULL) {
+ if (inevitable) {
wait_for_end_of_inevitable_transaction(tl);
}
@@ -209,14 +347,10 @@
STM_PSEGMENT->start_time = tl->_timing_cur_start;
STM_PSEGMENT->signalled_to_commit_soon = false;
STM_PSEGMENT->safe_point = SP_RUNNING;
-#ifndef NDEBUG
- STM_PSEGMENT->marker_inev[1] = 99999999999999999L;
-#endif
- if (jmpbuf == NULL)
+ STM_PSEGMENT->marker_inev[1] = 0;
+ if (inevitable)
marker_fetch_inev();
- STM_PSEGMENT->transaction_state = (jmpbuf != NULL ? TS_REGULAR
- : TS_INEVITABLE);
- STM_SEGMENT->jmpbuf_ptr = jmpbuf;
+ STM_PSEGMENT->transaction_state = (inevitable ? TS_INEVITABLE :
TS_REGULAR);
#ifndef NDEBUG
STM_PSEGMENT->running_pthread = pthread_self();
#endif
@@ -245,7 +379,8 @@
assert(list_is_empty(STM_PSEGMENT->young_weakrefs));
assert(tree_is_cleared(STM_PSEGMENT->young_outside_nursery));
assert(tree_is_cleared(STM_PSEGMENT->nursery_objects_shadows));
- assert(tree_is_cleared(STM_PSEGMENT->callbacks_on_abort));
+ assert(tree_is_cleared(STM_PSEGMENT->callbacks_on_commit_and_abort[0]));
+ assert(tree_is_cleared(STM_PSEGMENT->callbacks_on_commit_and_abort[1]));
assert(STM_PSEGMENT->objects_pointing_to_nursery == NULL);
assert(STM_PSEGMENT->large_overflow_objects == NULL);
#ifndef NDEBUG
@@ -256,6 +391,24 @@
check_nursery_at_transaction_start();
}
+long stm_start_transaction(stm_thread_local_t *tl)
+{
+ s_mutex_lock();
+#ifdef STM_NO_AUTOMATIC_SETJMP
+ long repeat_count = 0; /* test/support.py */
+#else
+ long repeat_count = stm_rewind_jmp_setjmp(tl);
+#endif
+ _stm_start_transaction(tl, false);
+ return repeat_count;
+}
+
+void stm_start_inevitable_transaction(stm_thread_local_t *tl)
+{
+ s_mutex_lock();
+ _stm_start_transaction(tl, true);
+}
+
/************************************************************/
@@ -286,13 +439,17 @@
({
if (was_read_remote(remote_base, item, remote_version)) {
/* A write-read conflict! */
- write_read_contention_management(i, item);
-
- /* If we reach this point, we didn't abort, but maybe we
- had to wait for the other thread to commit. If we
- did, then we have to restart committing from our call
- to synchronize_all_threads(). */
- return true;
+ dprintf(("write-read conflict on %p, our seg: %d, other:
%ld\n",
+ item, STM_SEGMENT->segment_num, i));
+ if (write_read_contention_management(i, item)) {
+ /* If we reach this point, we didn't abort, but we
+ had to wait for the other thread to commit. If we
+ did, then we have to restart committing from our
call
+ to synchronize_all_threads(). */
+ return true;
+ }
+ /* we aborted the other transaction without waiting, so
+ we can just continue */
}
}));
}
@@ -309,16 +466,16 @@
*/
assert(!_is_young(obj));
+ char *segment_base = get_segment_base(source_segment_num);
uintptr_t start = (uintptr_t)obj;
uintptr_t first_page = start / 4096UL;
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(segment_base, obj);
if (is_small_uniform(obj)) {
abort();//XXX WRITE THE FAST CASE
}
else {
- char *segment_base = get_segment_base(source_segment_num);
- struct object_s *realobj = (struct object_s *)
- REAL_ADDRESS(segment_base, obj);
ssize_t obj_size = stmcb_size_rounded_up(realobj);
assert(obj_size >= 16);
uintptr_t end = start + obj_size;
@@ -465,6 +622,237 @@
} while (j > 0);
}
+static void _page_wise_synchronize_object_now_default(object_t *obj)
+{
+ uintptr_t start = (uintptr_t)obj;
+ uintptr_t first_page = start / 4096UL;
+
+ char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
+ assert(obj_size >= 16);
+ uintptr_t end = start + obj_size;
+ uintptr_t last_page = (end - 1) / 4096UL;
+ long i, myself = STM_SEGMENT->segment_num;
+
+ for (; first_page <= last_page; first_page++) {
+
+ uintptr_t copy_size;
+ if (first_page == last_page) {
+ /* this is the final fragment */
+ copy_size = end - start;
+ }
+ else {
+ /* this is a non-final fragment, going up to the
+ page's end */
+ copy_size = 4096 - (start & 4095);
+ }
+ /* double-check that the result fits in one page */
+ assert(copy_size > 0);
+ assert(copy_size + (start & 4095) <= 4096);
+
+ /* First copy the object into the shared page, if needed */
+ char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
+ char *dst = REAL_ADDRESS(stm_object_pages, start);
+ if (is_private_page(myself, first_page)) {
+ if (copy_size == 4096)
+ pagecopy(dst, src);
+ else
+ memcpy(dst, src, copy_size);
+ }
+ else {
+ assert(memcmp(dst, src, copy_size) == 0); /* same page */
+ }
+
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ if (i == myself)
+ continue;
+
+ /* src = REAL_ADDRESS(stm_object_pages, start); */
+ dst = REAL_ADDRESS(get_segment_base(i), start);
+ if (is_private_page(i, first_page)) {
+ /* The page is a private page. We need to diffuse this
+ fragment of object from the shared page to this private
+ page. */
+ if (copy_size == 4096)
+ pagecopy(dst, src);
+ else
+ memcpy(dst, src, copy_size);
+ }
+ else {
+ assert(!memcmp(dst, src, copy_size)); /* same page */
+ }
+ }
+
+ start = (start + 4096) & ~4095;
+ }
+}
+
+static inline bool _has_private_page_in_range(
+ long seg_num, uintptr_t start, uintptr_t size)
+{
+ uintptr_t first_page = start / 4096UL;
+ uintptr_t last_page = (start + size) / 4096UL;
+ for (; first_page <= last_page; first_page++)
+ if (is_private_page(seg_num, first_page))
+ return true;
+ return false;
+}
+
+static void _card_wise_synchronize_object_now_default(object_t *obj)
+{
+ assert(obj_should_use_cards(obj));
+ assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+ assert(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj));
+
+ uintptr_t offset_itemsize[2];
+ struct object_s *realobj = (struct object_s
*)REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
+ size_t obj_size = stmcb_size_rounded_up(realobj);
+ assert(obj_size >= 32);
+ stmcb_get_card_base_itemsize(realobj, offset_itemsize);
+ size_t real_idx_count = (obj_size - offset_itemsize[0]) /
offset_itemsize[1];
+
+ uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj);
+ uintptr_t card_index = 1;
+ uintptr_t last_card_index = get_index_to_card_index(real_idx_count - 1);
/* max valid index */
+ long i, myself = STM_SEGMENT->segment_num;
+
+ /* simple heuristic to check if probably the whole object is
+ marked anyway so we should do page-wise synchronize */
+ if (write_locks[first_card_index + 1] == CARD_MARKED_OLD
+ && write_locks[first_card_index + last_card_index] == CARD_MARKED_OLD
+ && write_locks[first_card_index + (last_card_index >> 1) + 1] ==
CARD_MARKED_OLD) {
+
+ dprintf(("card_wise_sync assumes %p,size:%lu is fully marked\n", obj,
obj_size));
+ _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num),
+ obj, CARD_CLEAR, false);
+ _page_wise_synchronize_object_now(obj);
+ return;
+ }
+
+ dprintf(("card_wise_sync syncs %p,size:%lu card-wise\n", obj, obj_size));
+
+ /* Combine multiple marked cards and do a memcpy for them. We don't
+ try yet to use page_copy() or otherwise take into account privatization
+ of pages (except _has_private_page_in_range) */
+ bool all_cards_were_cleared = true;
+
+ uintptr_t start_card_index = -1;
+ while (card_index <= last_card_index) {
+ uintptr_t card_lock_idx = first_card_index + card_index;
+ uint8_t card_value = write_locks[card_lock_idx];
+
+ if (card_value == CARD_MARKED_OLD) {
+ write_locks[card_lock_idx] = CARD_CLEAR;
+
+ if (start_card_index == -1) { /* first marked card */
+ start_card_index = card_index;
+ /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */
+ /* realobj, get_card_index_to_index(card_index)); */
+ if (all_cards_were_cleared) {
+ all_cards_were_cleared = false;
+ }
+ }
+ }
+ else {
+ OPT_ASSERT(card_value == CARD_CLEAR);
+ }
+
+ if (start_card_index != -1 /* something to copy */
+ && (card_value != CARD_MARKED_OLD /* found non-marked card
*/
+ || card_index == last_card_index)) { /* this is the last card
*/
+ /* do the copying: */
+ uintptr_t start, copy_size;
+ uintptr_t next_card_offset;
+ uintptr_t start_card_offset;
+ uintptr_t next_card_index = card_index;
+
+ if (card_value == CARD_MARKED_OLD) {
+ /* card_index is the last card of the object, but we need
+ to go one further to get the right offset */
+ next_card_index++;
+ }
+
+ start_card_offset = offset_itemsize[0] +
+ get_card_index_to_index(start_card_index) * offset_itemsize[1];
+
+ next_card_offset = offset_itemsize[0] +
+ get_card_index_to_index(next_card_index) * offset_itemsize[1];
+
+ if (next_card_offset > obj_size)
+ next_card_offset = obj_size;
+
+ start = (uintptr_t)obj + start_card_offset;
+ copy_size = next_card_offset - start_card_offset;
+ OPT_ASSERT(copy_size > 0);
+
+ /* dprintf(("copy %lu bytes\n", copy_size)); */
+
+ /* since we have marked cards, at least one page here must be
private */
+ assert(_has_private_page_in_range(myself, start, copy_size));
+
+ /* copy to shared segment: */
+ char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start);
+ char *dst = REAL_ADDRESS(stm_object_pages, start);
+ memcpy(dst, src, copy_size);
+
+ /* copy to other segments */
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ if (i == myself)
+ continue;
+ if (!_has_private_page_in_range(i, start, copy_size))
+ continue;
+ /* src = REAL_ADDRESS(stm_object_pages, start); */
+ dst = REAL_ADDRESS(get_segment_base(i), start);
+ memcpy(dst, src, copy_size);
+ }
+
+ start_card_index = -1;
+ }
+
+ card_index++;
+ }
+
+ if (all_cards_were_cleared) {
+ /* well, seems like we never called stm_write_card() on it, so actually
+ we need to fall back to synchronize the whole object */
+ _page_wise_synchronize_object_now(obj);
+ return;
+ }
+
+#ifndef NDEBUG
+ char *src = REAL_ADDRESS(stm_object_pages, (uintptr_t)obj);
+ char *dst;
+ for (i = 1; i <= NB_SEGMENTS; i++) {
+ dst = REAL_ADDRESS(get_segment_base(i), (uintptr_t)obj);
+ assert(memcmp(dst, src, obj_size) == 0);
+ }
+#endif
+}
+
+static void synchronize_object_now_default(object_t *obj, bool ignore_cards)
+{
+ /* Copy around the version of 'obj' that lives in our own segment.
+ It is first copied into the shared pages, and then into other
+ segments' own private pages.
+
+ Must be called with the privatization lock acquired.
+ */
+ assert(!_is_young(obj));
+ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER);
+ assert(STM_PSEGMENT->privatization_lock == 1);
+
+ if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) {
+ assert(!(obj->stm_flags & GCFLAG_CARDS_SET));
+ abort();//XXX WRITE THE FAST CASE
+ } else if (ignore_cards || !obj_should_use_cards(obj)) {
+ _page_wise_synchronize_object_now(obj);
+ } else {
+ _card_wise_synchronize_object_now(obj);
+ }
+
+ _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), obj);
+}
+
static void push_overflow_objects_from_privatized_pages(void)
{
if (STM_PSEGMENT->large_overflow_objects == NULL)
@@ -472,7 +860,7 @@
acquire_privatization_lock();
LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *,
- synchronize_object_enqueue(item));
+ synchronize_object_enqueue(item, true /*ignore_cards*/));
synchronize_objects_flush();
release_privatization_lock();
}
@@ -497,7 +885,7 @@
/* copy the object to the shared page, and to the other
private pages as needed */
- synchronize_object_enqueue(item);
+ synchronize_object_enqueue(item, false); /* don't ignore_cards */
}));
release_privatization_lock();
@@ -511,8 +899,13 @@
STM_PSEGMENT->safe_point = SP_NO_TRANSACTION;
STM_PSEGMENT->transaction_state = TS_NONE;
+ /* marker_inev is not needed anymore */
+ STM_PSEGMENT->marker_inev[1] = 0;
+
/* reset these lists to NULL for the next transaction */
+
_verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
LIST_FREE(STM_PSEGMENT->objects_pointing_to_nursery);
+ list_clear(STM_PSEGMENT->old_objects_with_cards);
LIST_FREE(STM_PSEGMENT->large_overflow_objects);
timing_end_transaction(attribute_to);
@@ -533,6 +926,9 @@
/* the call to minor_collection() above leaves us with
STM_TIME_BOOKKEEPING */
+ /* synchronize overflow objects living in privatized pages */
+ push_overflow_objects_from_privatized_pages();
+
s_mutex_lock();
restart:
@@ -549,31 +945,29 @@
dprintf(("commit_transaction\n"));
assert(STM_SEGMENT->nursery_end == NURSERY_END);
- STM_SEGMENT->jmpbuf_ptr = NULL;
+ stm_rewind_jmp_forget(STM_SEGMENT->running_thread);
/* if a major collection is required, do it here */
- if (is_major_collection_requested())
+ if (is_major_collection_requested()) {
+ int oldstate = change_timing_state(STM_TIME_MAJOR_GC);
major_collection_now_at_safe_point();
-
- /* synchronize overflow objects living in privatized pages */
- push_overflow_objects_from_privatized_pages();
+ change_timing_state(oldstate);
+ }
/* synchronize modified old objects to other threads */
push_modified_to_other_segments();
+
_verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num));
/* update 'overflow_number' if needed */
if (STM_PSEGMENT->overflow_number_has_been_used) {
highest_overflow_number += GCFLAG_OVERFLOW_NUMBER_bit0;
- /* Note that the overflow number cannot be entirely 1 bits;
- this prevents stm_flags from ever containing the value -1,
- which might be confused with GCWORD_MOVED. */
assert(highest_overflow_number != /* XXX else, overflow! */
(uint32_t)-GCFLAG_OVERFLOW_NUMBER_bit0);
STM_PSEGMENT->overflow_number = highest_overflow_number;
STM_PSEGMENT->overflow_number_has_been_used = false;
}
- clear_callbacks_on_abort();
+ invoke_and_clear_user_callbacks(0); /* for commit */
/* send what is hopefully the correct signals */
if (STM_PSEGMENT->transaction_state == TS_INEVITABLE) {
@@ -622,11 +1016,14 @@
ssize_t size = stmcb_size_rounded_up((struct object_s *)src);
memcpy(dst, src, size);
+ if (obj_should_use_cards(item))
+ _reset_object_cards(pseg, item, CARD_CLEAR, false);
+
/* objects in 'modified_old_objects' usually have the
WRITE_BARRIER flag, unless they have been modified
recently. Ignore the old flag; after copying from the
other segment, we should have the flag. */
- assert(item->stm_flags & GCFLAG_WRITE_BARRIER);
+ assert(((struct object_s *)dst)->stm_flags & GCFLAG_WRITE_BARRIER);
/* write all changes to the object before we release the
write lock below. This is needed because we need to
@@ -650,6 +1047,10 @@
static void abort_data_structures_from_segment_num(int segment_num)
{
+#pragma push_macro("STM_PSEGMENT")
+#pragma push_macro("STM_SEGMENT")
+#undef STM_PSEGMENT
+#undef STM_SEGMENT
/* This function clears the content of the given segment undergoing
an abort. It is called from abort_with_mutex(), but also sometimes
from other threads that figure out that this segment should abort.
@@ -677,25 +1078,74 @@
/* throw away the content of the nursery */
long bytes_in_nursery = throw_away_nursery(pseg);
+ /* modified_old_objects' cards get cleared in
+ reset_modified_from_other_segments. Objs in old_objs_with_cards but not
+ in modified_old_objs are overflow objects and handled here: */
+ if (pseg->large_overflow_objects != NULL) {
+ /* some overflow objects may have cards when aborting, clear them too
*/
+ LIST_FOREACH_R(pseg->large_overflow_objects, object_t * /*item*/,
+ {
+ struct object_s *realobj = (struct object_s *)
+ REAL_ADDRESS(pseg->pub.segment_base, item);
+
+ if (realobj->stm_flags & GCFLAG_CARDS_SET) {
+ /* CARDS_SET is enough since other HAS_CARDS objs
+ are already cleared */
+ _reset_object_cards(pseg, item, CARD_CLEAR, false);
+ }
+ });
+ }
+
/* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */
reset_modified_from_other_segments(segment_num);
+ _verify_cards_cleared_in_all_lists(pseg);
- /* reset the tl->shadowstack and thread_local_obj to their original
- value before the transaction start */
+ /* reset tl->shadowstack and thread_local_obj to their original
+ value before the transaction start. Also restore the content
+ of the shadowstack here. */
stm_thread_local_t *tl = pseg->pub.running_thread;
+#ifdef STM_NO_AUTOMATIC_SETJMP
+ /* In tests, we don't save and restore the shadowstack correctly.
+ Be sure to not change items below shadowstack_at_start_of_transaction.
+ There is no such restrictions in non-Python-based tests. */
assert(tl->shadowstack >= pseg->shadowstack_at_start_of_transaction);
- pseg->shadowstack_at_abort = tl->shadowstack;
tl->shadowstack = pseg->shadowstack_at_start_of_transaction;
+#else
+ /* NB. careful, this function might be called more than once to
+ abort a given segment. Make sure that
+ stm_rewind_jmp_restore_shadowstack() is idempotent. */
+ /* we need to do this here and not directly in rewind_longjmp() because
+ that is called when we already released everything (safe point)
+ and a concurrent major GC could mess things up. */
+ if (tl->shadowstack != NULL)
+ stm_rewind_jmp_restore_shadowstack(tl);
+ assert(tl->shadowstack == pseg->shadowstack_at_start_of_transaction);
+#endif
tl->thread_local_obj = pseg->threadlocal_at_start_of_transaction;
tl->last_abort__bytes_in_nursery = bytes_in_nursery;
/* reset these lists to NULL too on abort */
LIST_FREE(pseg->objects_pointing_to_nursery);
+ list_clear(pseg->old_objects_with_cards);
LIST_FREE(pseg->large_overflow_objects);
list_clear(pseg->young_weakrefs);
+#pragma pop_macro("STM_SEGMENT")
+#pragma pop_macro("STM_PSEGMENT")
}
-static void abort_with_mutex(void)
+#ifdef STM_NO_AUTOMATIC_SETJMP
+void _test_run_abort(stm_thread_local_t *tl) __attribute__((noreturn));
+int stm_is_inevitable(void)
+{
+ switch (STM_PSEGMENT->transaction_state) {
+ case TS_REGULAR: return 0;
+ case TS_INEVITABLE: return 1;
+ default: abort();
+ }
+}
+#endif
+
+static stm_thread_local_t *abort_with_mutex_no_longjmp(void)
{
assert(_has_mutex());
dprintf(("~~~ ABORT\n"));
@@ -704,15 +1154,14 @@
abort_data_structures_from_segment_num(STM_SEGMENT->segment_num);
- stm_jmpbuf_t *jmpbuf_ptr = STM_SEGMENT->jmpbuf_ptr;
+ stm_thread_local_t *tl = STM_SEGMENT->running_thread;
/* clear memory registered on the thread-local */
- stm_thread_local_t *tl = STM_SEGMENT->running_thread;
if (tl->mem_clear_on_abort)
memset(tl->mem_clear_on_abort, 0, tl->mem_bytes_to_clear_on_abort);
/* invoke the callbacks */
- invoke_and_clear_callbacks_on_abort();
+ invoke_and_clear_user_callbacks(1); /* for abort */
int attribute_to = STM_TIME_RUN_ABORTED_OTHER;
@@ -729,6 +1178,12 @@
/* Broadcast C_ABORTED to wake up contention.c */
cond_broadcast(C_ABORTED);
+ return tl;
+}
+
+static void abort_with_mutex(void)
+{
+ stm_thread_local_t *tl = abort_with_mutex_no_longjmp();
s_mutex_unlock();
/* It seems to be a good idea, at least in some examples, to sleep
@@ -743,9 +1198,12 @@
*/
usleep(1);
- assert(jmpbuf_ptr != NULL);
- assert(jmpbuf_ptr != (stm_jmpbuf_t *)-1); /* for tests only */
- __builtin_longjmp(*jmpbuf_ptr, 1);
+#ifdef STM_NO_AUTOMATIC_SETJMP
+ _test_run_abort(tl);
+#else
+ s_mutex_lock();
+ stm_rewind_jmp_longjmp(tl);
+#endif
}
void _stm_become_inevitable(const char *msg)
@@ -759,12 +1217,11 @@
marker_fetch_inev();
wait_for_end_of_inevitable_transaction(NULL);
STM_PSEGMENT->transaction_state = TS_INEVITABLE;
- STM_SEGMENT->jmpbuf_ptr = NULL;
- clear_callbacks_on_abort();
+ stm_rewind_jmp_forget(STM_SEGMENT->running_thread);
+ invoke_and_clear_user_callbacks(0); /* for commit */
}
else {
assert(STM_PSEGMENT->transaction_state == TS_INEVITABLE);
- assert(STM_SEGMENT->jmpbuf_ptr == NULL);
}
s_mutex_unlock();
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -14,7 +14,7 @@
#endif
-#define NB_PAGES (1500*256) // 1500MB
+#define NB_PAGES (2500*256) // 2500MB
#define NB_SEGMENTS STM_NB_SEGMENTS
#define NB_SEGMENTS_MAX 240 /* don't increase NB_SEGMENTS past this */
#define MAP_PAGES_FLAGS (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE)
@@ -35,6 +35,8 @@
#define WRITELOCK_START ((END_NURSERY_PAGE * 4096UL) >> 4)
#define WRITELOCK_END READMARKER_END
+#define CARD_SIZE _STM_CARD_SIZE
+
enum /* stm_flags */ {
/* This flag is set on non-nursery objects. It forces stm_write()
to call _stm_write_slowpath().
@@ -47,7 +49,13 @@
The same flag is abused to mark prebuilt objects whose hash has
been taken during translation and is statically recorded just
after the object. */
- GCFLAG_HAS_SHADOW = 0x2,
+ GCFLAG_HAS_SHADOW = 0x02,
+
+ /* Set on objects that are large enough (_STM_MIN_CARD_OBJ_SIZE)
+ to have multiple cards (at least _STM_MIN_CARD_COUNT), and that
+ have at least one card marked. This flag implies
+ GCFLAG_WRITE_BARRIER. */
+ GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET,
/* All remaining bits of the 32-bit 'stm_flags' field are taken by
the "overflow number". This is a number that identifies the
@@ -56,7 +64,7 @@
current transaction that have been flushed out of the nursery,
which occurs if the same transaction allocates too many objects.
*/
- GCFLAG_OVERFLOW_NUMBER_bit0 = 0x4 /* must be last */
+ GCFLAG_OVERFLOW_NUMBER_bit0 = 0x8 /* must be last */
};
#define SYNC_QUEUE_SIZE 31
@@ -75,9 +83,7 @@
/* List of old objects (older than the current transaction) that the
current transaction attempts to modify. This is used to track
the STM status: they are old objects that where written to and
- that need to be copied to other segments upon commit. Note that
- every object takes three list items: the object, and two words for
- the location marker. */
+ that need to be copied to other segments upon commit. */
struct list_s *modified_old_objects;
/* For each entry in 'modified_old_objects', we have two entries
@@ -95,6 +101,10 @@
understood as meaning implicitly "this is the same as
'modified_old_objects'". */
struct list_s *objects_pointing_to_nursery;
+ /* Like objects_pointing_to_nursery it holds the old objects that
+ we did a stm_write_card() on. Objects can be in both lists.
+ It is NULL iff objects_pointing_to_nursery is NULL. */
+ struct list_s *old_objects_with_cards;
/* List of all large, overflowed objects. Only non-NULL after the
current transaction spanned a minor collection. */
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit