Author: Maciej Fijalkowski <fij...@gmail.com> Branch: Changeset: r76802:86cfdf3d2620 Date: 2015-04-16 10:17 +0200 http://bitbucket.org/pypy/pypy/changeset/86cfdf3d2620/
Log: merge vmprof diff too long, truncating to 2000 out of 2558 lines diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -9,7 +9,6 @@ from rpython.rlib.signature import signature from rpython.rlib.rarithmetic import r_uint, SHRT_MIN, SHRT_MAX, \ INT_MIN, INT_MAX, UINT_MAX, USHRT_MAX -from rpython.rlib.rweaklist import RWeakListMixin from pypy.interpreter.executioncontext import (ExecutionContext, ActionFlag, UserDelAction) @@ -367,10 +366,6 @@ # ____________________________________________________________ -class CodeObjWeakList(RWeakListMixin): - def __init__(self): - self.initialize() - class ObjSpace(object): """Base class for the interpreter-level implementations of object spaces. http://pypy.readthedocs.org/en/latest/objspace.html""" @@ -394,7 +389,6 @@ self.check_signal_action = None # changed by the signal module self.user_del_action = UserDelAction(self) self._code_of_sys_exc_info = None - self.all_code_objs = CodeObjWeakList() # can be overridden to a subclass self.initialize() @@ -672,16 +666,16 @@ assert ec is not None return ec + def register_code_callback(self, callback): + ec = self.getexecutioncontext() + ec._code_callback = callback + def register_code_object(self, pycode): - callback = self.getexecutioncontext().register_code_callback - if callback is not None: - callback(self, pycode) - self.all_code_objs.add_handle(pycode) - - def set_code_callback(self, callback): ec = self.getexecutioncontext() - ec.register_code_callback = callback - + if ec._code_callback is None: + return + ec._code_callback(self, pycode) + def _freeze_(self): return True diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -2,6 +2,7 @@ from pypy.interpreter.error import OperationError, get_cleared_operation_error from rpython.rlib.unroll import unrolling_iterable from rpython.rlib import jit +from rpython.rlib.objectmodel import we_are_translated TICK_COUNTER_STEP = 100 @@ -33,11 +34,16 @@ self.profilefunc = None self.w_profilefuncarg = None self.thread_disappeared = False # might be set to True after os.fork() - self.register_code_callback = None + if sys.maxint == 2147483647: self._code_unique_id = 0 # XXX this is wrong, it won't work on 32bit else: - self._code_unique_id = 0x7000000000000000 + if we_are_translated(): + self._code_unique_id = 0x7000000000000000 + else: + self._code_unique_id = 0x7700000000000000 + # should be enough code objects + self._code_callback = None @staticmethod def _mark_thread_disappeared(space): diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -129,7 +129,7 @@ ec = self.space.getexecutioncontext() self._unique_id = ec._code_unique_id - ec._code_unique_id += 2 # so we have one bit that we can mark stuff + ec._code_unique_id += 4 # so we have two bits that we can mark stuff # with def _get_full_name(self): diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -3,13 +3,15 @@ from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.annlowlevel import cast_instance_to_gcref, cast_base_ptr_to_instance from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib import jit, rposix, entrypoint +from rpython.rlib import jit, rposix, rgc +from rpython.rlib.rarithmetic import ovfcheck_float_to_int from rpython.rtyper.tool import rffi_platform as platform from rpython.rlib.rstring import StringBuilder from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import oefmt, wrap_oserror, OperationError from pypy.interpreter.gateway import unwrap_spec from pypy.interpreter.pyframe import PyFrame +from pypy.interpreter.pycode import PyCode ROOT = py.path.local(__file__).join('..') SRC = ROOT.join('src') @@ -28,14 +30,13 @@ libraries = ['unwind'], post_include_bits=[""" - void* pypy_vmprof_get_virtual_ip(void*); void pypy_vmprof_init(void); """], separate_module_sources=[""" void pypy_vmprof_init(void) { vmprof_set_mainloop(pypy_execute_frame_trampoline, 0, - pypy_vmprof_get_virtual_ip); + NULL); } """], ) @@ -56,7 +57,7 @@ pypy_execute_frame_trampoline = rffi.llexternal( "pypy_execute_frame_trampoline", - [llmemory.GCREF, llmemory.GCREF, llmemory.GCREF], + [llmemory.GCREF, llmemory.GCREF, llmemory.GCREF, lltype.Signed], llmemory.GCREF, compilation_info=eci, _nowrapper=True, sandboxsafe=True, @@ -96,23 +97,15 @@ gc_frame = cast_instance_to_gcref(frame) gc_inputvalue = cast_instance_to_gcref(w_inputvalue) gc_operr = cast_instance_to_gcref(operr) - gc_result = pypy_execute_frame_trampoline(gc_frame, gc_inputvalue, gc_operr) + assert frame.pycode._unique_id & 3 == 0 + unique_id = frame.pycode._unique_id | 1 + gc_result = pypy_execute_frame_trampoline(gc_frame, gc_inputvalue, + gc_operr, unique_id) return cast_base_ptr_to_instance(W_Root, gc_result) else: return original_execute_frame(frame, w_inputvalue, operr) -@entrypoint.entrypoint_lowlevel('main', [llmemory.GCREF], - 'pypy_vmprof_get_virtual_ip', True) -def get_virtual_ip(gc_frame): - frame = cast_base_ptr_to_instance(PyFrame, gc_frame) - if jit._get_virtualizable_token(frame): - return rffi.cast(rffi.VOIDP, 0) - virtual_ip = do_get_virtual_ip(frame) - return rffi.cast(rffi.VOIDP, virtual_ip) - -def do_get_virtual_ip(frame): - return frame.pycode._unique_id def write_long_to_string_builder(l, b): if sys.maxint == 2147483647: @@ -130,31 +123,33 @@ b.append(chr((l >> 48) & 0xff)) b.append(chr((l >> 56) & 0xff)) +def try_cast_to_pycode(gcref): + return rgc.try_cast_gcref_to_instance(PyCode, gcref) + +MAX_CODES = 1000 + class VMProf(object): def __init__(self): self.is_enabled = False self.ever_enabled = False - self.mapping_so_far = [] # stored mapping in between runs self.fileno = -1 + self.current_codes = [] - def enable(self, space, fileno, period): + def enable(self, space, fileno, period_usec): if self.is_enabled: raise oefmt(space.w_ValueError, "_vmprof already enabled") self.fileno = fileno self.is_enabled = True - self.write_header(fileno, period) + self.write_header(fileno, period_usec) if not self.ever_enabled: if we_are_translated(): pypy_vmprof_init() self.ever_enabled = True - for weakcode in space.all_code_objs.get_all_handles(): - code = weakcode() - if code: - self.register_code(space, code) - space.set_code_callback(vmprof_register_code) + self.gather_all_code_objs(space) + space.register_code_callback(vmprof_register_code) if we_are_translated(): # does not work untranslated - res = vmprof_enable(fileno, period, 0, + res = vmprof_enable(fileno, period_usec, 0, lltype.nullptr(rffi.CCHARP.TO), 0) else: res = 0 @@ -162,42 +157,55 @@ raise wrap_oserror(space, OSError(rposix.get_saved_errno(), "_vmprof.enable")) - def write_header(self, fileno, period): - if period == -1: - period_usec = 1000000 / 100 # 100hz - else: - period_usec = period + def gather_all_code_objs(self, space): + all_code_objs = rgc.do_get_objects(try_cast_to_pycode) + for code in all_code_objs: + self.register_code(space, code) + + def write_header(self, fileno, period_usec): + assert period_usec > 0 b = StringBuilder() write_long_to_string_builder(0, b) write_long_to_string_builder(3, b) write_long_to_string_builder(0, b) write_long_to_string_builder(period_usec, b) write_long_to_string_builder(0, b) + b.append('\x04') # interp name + b.append(chr(len('pypy'))) + b.append('pypy') os.write(fileno, b.build()) def register_code(self, space, code): if self.fileno == -1: raise OperationError(space.w_RuntimeError, space.wrap("vmprof not running")) - name = code._get_full_name() + self.current_codes.append(code) + if len(self.current_codes) >= MAX_CODES: + self._flush_codes(space) + + def _flush_codes(self, space): b = StringBuilder() - b.append('\x02') - write_long_to_string_builder(code._unique_id, b) - write_long_to_string_builder(len(name), b) - b.append(name) + for code in self.current_codes: + name = code._get_full_name() + b.append('\x02') + write_long_to_string_builder(code._unique_id, b) + write_long_to_string_builder(len(name), b) + b.append(name) os.write(self.fileno, b.build()) + self.current_codes = [] def disable(self, space): if not self.is_enabled: raise oefmt(space.w_ValueError, "_vmprof not enabled") self.is_enabled = False + space.register_code_callback(None) + self._flush_codes(space) self.fileno = -1 if we_are_translated(): # does not work untranslated res = vmprof_disable() else: res = 0 - space.set_code_callback(None) if res == -1: raise wrap_oserror(space, OSError(rposix.get_saved_errno(), "_vmprof.disable")) @@ -207,13 +215,23 @@ mod_vmprof = space.getbuiltinmodule('_vmprof') assert isinstance(mod_vmprof, Module) mod_vmprof.vmprof.register_code(space, code) - -@unwrap_spec(fileno=int, period=int) -def enable(space, fileno, period=-1): + +@unwrap_spec(fileno=int, period=float) +def enable(space, fileno, period=0.01): # default 100 Hz from pypy.module._vmprof import Module mod_vmprof = space.getbuiltinmodule('_vmprof') assert isinstance(mod_vmprof, Module) - mod_vmprof.vmprof.enable(space, fileno, period) + # + try: + period_usec = ovfcheck_float_to_int(period * 1000000.0 + 0.5) + if period_usec <= 0 or period_usec >= 1e6: + # we don't want seconds here at all + raise ValueError + except (ValueError, OverflowError): + raise OperationError(space.w_ValueError, + space.wrap("'period' too large or non positive")) + # + mod_vmprof.vmprof.enable(space, fileno, period_usec) def disable(space): from pypy.module._vmprof import Module diff --git a/pypy/module/_vmprof/src/fake_pypy_api.c b/pypy/module/_vmprof/src/fake_pypy_api.c --- a/pypy/module/_vmprof/src/fake_pypy_api.c +++ b/pypy/module/_vmprof/src/fake_pypy_api.c @@ -1,25 +1,15 @@ - -long pypy_jit_start_addr(void) -{ - return 3; -} - -long pypy_jit_end_addr(void) -{ - return 3; -} long pypy_jit_stack_depth_at_loc(long x) { return 0; } -long pypy_find_codemap_at_addr(long x) +void *pypy_find_codemap_at_addr(long x) { - return 0; + return (void *)0; } -long pypy_yield_codemap_at_addr(long x, long y, long *a) +long pypy_yield_codemap_at_addr(void *x, long y, long *a) { return 0; } @@ -27,3 +17,5 @@ void pypy_pyframe_execute_frame(void) { } + +volatile int pypy_codemap_currently_invalid = 0; diff --git a/pypy/module/_vmprof/src/get_custom_offset.c b/pypy/module/_vmprof/src/get_custom_offset.c --- a/pypy/module/_vmprof/src/get_custom_offset.c +++ b/pypy/module/_vmprof/src/get_custom_offset.c @@ -1,46 +1,65 @@ -long pypy_jit_start_addr(); -long pypy_jit_end_addr(); -long pypy_jit_stack_depth_at_loc(long); -long pypy_find_codemap_at_addr(long); -long pypy_yield_codemap_at_addr(long, long, long*); +extern volatile int pypy_codemap_currently_invalid; + +void *pypy_find_codemap_at_addr(long addr, long *start_addr); +long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, + long *current_pos_addr); +long pypy_jit_stack_depth_at_loc(long loc); + void vmprof_set_tramp_range(void* start, void* end) { } -static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, unw_cursor_t *cp) { - intptr_t ip_l = (intptr_t)ip; +int custom_sanity_check() +{ + return !pypy_codemap_currently_invalid; +} - if (ip_l < pypy_jit_start_addr() || ip_l > pypy_jit_end_addr()) { - return -1; - } - return (void*)pypy_jit_stack_depth_at_loc(ip_l); +static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) { + intptr_t ip_l = (intptr_t)ip; + return pypy_jit_stack_depth_at_loc(ip_l); } static long vmprof_write_header_for_jit_addr(void **result, long n, - void *ip, int max_depth) + void *ip, int max_depth) { - long codemap_pos; - long current_pos = 0; - intptr_t id; - intptr_t addr = (intptr_t)ip; + void *codemap; + long current_pos = 0; + intptr_t id; + long start_addr = 0; + intptr_t addr = (intptr_t)ip; + int start, k; + void *tmp; - if (addr < pypy_jit_start_addr() || addr > pypy_jit_end_addr()) { - return n; - } - codemap_pos = pypy_find_codemap_at_addr(addr); - if (codemap_pos == -1) { - return n; - } - while (1) { - id = pypy_yield_codemap_at_addr(codemap_pos, addr, ¤t_pos); - if (id == 0) { - return n; - } - result[n++] = id; - if (n >= max_depth) { - return n; - } - } + codemap = pypy_find_codemap_at_addr(addr, &start_addr); + if (codemap == NULL) + // not a jit code at all + return n; + + // modify the last entry to point to start address and not the random one + // in the middle + result[n - 1] = (void*)start_addr; + start = n; + while (n < max_depth) { + id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); + if (id == 0) + // finish + break; + result[n++] = (void *)id; + } + // we strip the topmost part - the reason is that it's either + // represented in the jitted caller or it's not jitted (we have the + // same function essentially twice + k = 0; + while (k < (n - start) / 2) { + tmp = result[start + k]; + result[start + k] = result[n - k - 1]; + result[n - k - 1] = tmp; + k++; + } + if (n != max_depth) { + n--; + } + return n; } diff --git a/pypy/module/_vmprof/src/trampoline.asmgcc.s b/pypy/module/_vmprof/src/trampoline.asmgcc.s --- a/pypy/module/_vmprof/src/trampoline.asmgcc.s +++ b/pypy/module/_vmprof/src/trampoline.asmgcc.s @@ -6,11 +6,10 @@ .type pypy_execute_frame_trampoline, @function pypy_execute_frame_trampoline: .cfi_startproc - pushq %rdi + pushq %rcx .cfi_def_cfa_offset 16 call pypy_pyframe_execute_frame@PLT - /* GCROOT 0(%rsp) */ - popq %rdi + popq %rcx .cfi_def_cfa_offset 8 ret .cfi_endproc diff --git a/pypy/module/_vmprof/src/trampoline.h b/pypy/module/_vmprof/src/trampoline.h --- a/pypy/module/_vmprof/src/trampoline.h +++ b/pypy/module/_vmprof/src/trampoline.h @@ -1,1 +1,1 @@ -void* pypy_execute_frame_trampoline(void*, void*, void*); +void* pypy_execute_frame_trampoline(void*, void*, void*, long); diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c --- a/pypy/module/_vmprof/src/vmprof.c +++ b/pypy/module/_vmprof/src/vmprof.c @@ -25,6 +25,8 @@ #include <unistd.h> #include <sys/time.h> #include <sys/types.h> +#include <errno.h> +#include <pthread.h> #define UNW_LOCAL_ONLY #include <libunwind.h> @@ -34,12 +36,18 @@ #define _unused(x) ((void)x) #define MAX_FUNC_NAME 128 -#define MAX_STACK_DEPTH 64 +#define MAX_STACK_DEPTH 1024 +#define BUFFER_SIZE 8192 -static FILE* profile_file = NULL; + +static int profile_file = 0; +static char profile_write_buffer[BUFFER_SIZE]; +static int profile_buffer_position = 0; void* vmprof_mainloop_func; static ptrdiff_t mainloop_sp_offset; static vmprof_get_virtual_ip_t mainloop_get_virtual_ip; +static long last_period_usec = 0; +static int atfork_hook_installed = 0; /* ************************************************************* @@ -51,27 +59,33 @@ #define MARKER_VIRTUAL_IP '\x02' #define MARKER_TRAILER '\x03' -static void prof_word(FILE* f, long x) { - fwrite(&x, sizeof(x), 1, f); +static void prof_word(long x) { + ((long*)(profile_write_buffer + profile_buffer_position))[0] = x; + profile_buffer_position += sizeof(long); } -static void prof_header(FILE* f, long period_usec) { - prof_word(f, 0); - prof_word(f, 3); - prof_word(f, 0); - prof_word(f, period_usec); - prof_word(f, 0); +static void prof_header(long period_usec) { + // XXX never used here? + prof_word(0); + prof_word(3); + prof_word(0); + prof_word(period_usec); + prof_word(0); + write(profile_file, profile_write_buffer, profile_buffer_position); + profile_buffer_position = 0; } -static void prof_write_stacktrace(FILE* f, void** stack, int depth, int count) { +static void prof_write_stacktrace(void** stack, int depth, int count) { int i; char marker = MARKER_STACKTRACE; - fwrite(&marker, 1, 1, f); - prof_word(f, count); - prof_word(f, depth); + profile_write_buffer[profile_buffer_position++] = MARKER_STACKTRACE; + prof_word(count); + prof_word(depth); for(i=0; i<depth; i++) - prof_word(f, (long)stack[i]); + prof_word((long)stack[i]); + write(profile_file, profile_write_buffer, profile_buffer_position); + profile_buffer_position = 0; } @@ -90,12 +104,17 @@ void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4]; } vmprof_hacked_unw_cursor_t; -static int vmprof_unw_step(unw_cursor_t *cp) { +static int vmprof_unw_step(unw_cursor_t *cp, int first_run) { void* ip; void* sp; ptrdiff_t sp_offset; unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip); unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp); + if (!first_run) + // make sure we're pointing to the CALL and not to the first + // instruction after. If the callee adjusts the stack for us + // it's not safe to be at the instruction after + ip -= 1; sp_offset = vmprof_unw_get_custom_offset(ip, cp); if (sp_offset == -1) { @@ -122,30 +141,30 @@ * ************************************************************* */ -// stolen from pprof: -// Sometimes, we can try to get a stack trace from within a stack -// trace, because libunwind can call mmap (maybe indirectly via an -// internal mmap based memory allocator), and that mmap gets trapped -// and causes a stack-trace request. If were to try to honor that -// recursive request, we'd end up with infinite recursion or deadlock. -// Luckily, it's safe to ignore those subsequent traces. In such -// cases, we return 0 to indicate the situation. +// The original code here has a comment, "stolen from pprof", +// about a "__thread int recursive". But general __thread +// variables are not really supposed to be accessed from a +// signal handler. Moreover, we are using SIGPROF, which +// should not be recursively called on the same thread. //static __thread int recursive; -static int recursive; // XXX antocuni: removed __thread int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) { void *ip; int n = 0; unw_cursor_t cursor; unw_context_t uc = *ucontext; - if (recursive) { + //if (recursive) { + // return 0; + //} + if (!custom_sanity_check()) { return 0; } - ++recursive; + //++recursive; int ret = unw_init_local(&cursor, &uc); assert(ret >= 0); _unused(ret); + int first_run = 1; while (n < max_depth) { if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { @@ -173,16 +192,21 @@ void **arg_ptr = (void**)arg_addr; // fprintf(stderr, "stacktrace mainloop: rsp %p &f2 %p offset %ld\n", // sp, arg_addr, mainloop_sp_offset); - ip = mainloop_get_virtual_ip(*arg_ptr); + if (mainloop_get_virtual_ip) { + ip = mainloop_get_virtual_ip(*arg_ptr); + } else { + ip = *arg_ptr; + } } result[n++] = ip; n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth); - if (vmprof_unw_step(&cursor) <= 0) { + if (vmprof_unw_step(&cursor, first_run) <= 0) { break; } + first_run = 0; } - --recursive; + //--recursive; return n; } @@ -193,10 +217,12 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { void* stack[MAX_STACK_DEPTH]; + int saved_errno = errno; stack[0] = GetPC((ucontext_t*)ucontext); int depth = frame_forcer(get_stack_trace(stack+1, MAX_STACK_DEPTH-1, ucontext)); depth++; // To account for pc value in stack[0]; - prof_write_stacktrace(profile_file, stack, depth, 1); + prof_write_stacktrace(stack, depth, 1); + errno = saved_errno; } /* ************************************************************* @@ -209,14 +235,12 @@ if ((fd = dup(fd)) == -1) { return -1; } - profile_file = fdopen(fd, "wb"); - if (!profile_file) { - return -1; - } + profile_buffer_position = 0; + profile_file = fd; if (write_header) - prof_header(profile_file, period_usec); + prof_header(period_usec); if (s) - fwrite(s, slen, 1, profile_file); + write(profile_file, s, slen); return 0; } @@ -226,16 +250,16 @@ char buf[BUFSIZ]; size_t size; int marker = MARKER_TRAILER; - fwrite(&marker, 1, 1, profile_file); + write(profile_file, &marker, 1); // copy /proc/PID/maps to the end of the profile file sprintf(buf, "/proc/%d/maps", getpid()); src = fopen(buf, "r"); while ((size = fread(buf, 1, BUFSIZ, src))) { - fwrite(buf, 1, size, profile_file); + write(profile_file, buf, size); } fclose(src); - fclose(profile_file); + close(profile_file); return 0; } @@ -253,15 +277,16 @@ } static int remove_sigprof_handler(void) { - //sighandler_t res = signal(SIGPROF, SIG_DFL); - //if (res == SIG_ERR) { - // return -1; - //} + sighandler_t res = signal(SIGPROF, SIG_DFL); + if (res == SIG_ERR) { + return -1; + } return 0; }; static int install_sigprof_timer(long period_usec) { static struct itimerval timer; + last_period_usec = period_usec; timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = period_usec; timer.it_value = timer.it_interval; @@ -273,15 +298,45 @@ static int remove_sigprof_timer(void) { static struct itimerval timer; + last_period_usec = 0; timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; - timer.it_value = timer.it_interval; + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = 0; if (setitimer(ITIMER_PROF, &timer, NULL) != 0) { return -1; } return 0; } +static void atfork_disable_timer(void) { + remove_sigprof_timer(); +} + +static void atfork_enable_timer(void) { + install_sigprof_timer(last_period_usec); +} + +static int install_pthread_atfork_hooks(void) { + /* this is needed to prevent the problems described there: + - http://code.google.com/p/gperftools/issues/detail?id=278 + - http://lists.debian.org/debian-glibc/2010/03/msg00161.html + + TL;DR: if the RSS of the process is large enough, the clone() syscall + will be interrupted by the SIGPROF before it can complete, then + retried, interrupted again and so on, in an endless loop. The + solution is to disable the timer around the fork, and re-enable it + only inside the parent. + */ + if (atfork_hook_installed) + return 0; + int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL); + if (ret != 0) + return -1; + atfork_hook_installed = 1; + return 0; +} + /* ************************************************************* * public API * ************************************************************* @@ -297,8 +352,7 @@ int vmprof_enable(int fd, long period_usec, int write_header, char *s, int slen) { - if (period_usec == -1) - period_usec = 1000000 / 100; /* 100hz */ + assert(period_usec > 0); if (open_profile(fd, period_usec, write_header, s, slen) == -1) { return -1; } @@ -308,6 +362,9 @@ if (install_sigprof_timer(period_usec) == -1) { return -1; } + if (install_pthread_atfork_hooks() == -1) { + return -1; + } return 0; } @@ -325,6 +382,7 @@ } void vmprof_register_virtual_function(const char* name, void* start, void* end) { + // XXX unused by pypy // for now *end is simply ignored char buf[1024]; int lgt = strlen(name) + 2 * sizeof(long) + 1; @@ -336,5 +394,5 @@ ((void **)(((void*)buf) + 1))[0] = start; ((long *)(((void*)buf) + 1 + sizeof(long)))[0] = lgt - 2 * sizeof(long) - 1; strncpy(buf + 2 * sizeof(long) + 1, name, 1024 - 2 * sizeof(long) - 1); - fwrite(buf, lgt, 1, profile_file); + write(profile_file, buf, lgt); } diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -21,6 +21,11 @@ i = 0 count = 0 i += 5 * WORD # header + assert s[i] == '\x04' + i += 1 # marker + assert s[i] == '\x04' + i += 1 # length + i += len('pypy') while i < len(s): assert s[i] == '\x02' i += 1 @@ -53,3 +58,11 @@ assert "py:foo:" in s assert "py:foo2:" in s assert no_of_codes2 >= no_of_codes + 2 # some extra codes from tests + + def test_enable_ovf(self): + import _vmprof + raises(ValueError, _vmprof.enable, 999, 0) + raises(ValueError, _vmprof.enable, 999, -2.5) + raises(ValueError, _vmprof.enable, 999, 1e300) + raises(ValueError, _vmprof.enable, 999, 1e300 * 1e300) + raises(ValueError, _vmprof.enable, 999, (1e300*1e300) / (1e300*1e300)) diff --git a/pypy/module/_vmprof/test/test_direct.py b/pypy/module/_vmprof/test/test_direct.py new file mode 100644 --- /dev/null +++ b/pypy/module/_vmprof/test/test_direct.py @@ -0,0 +1,66 @@ + +import cffi, py + +srcdir = py.path.local(__file__).join("..", "..", "src") + +ffi = cffi.FFI() +ffi.cdef(""" +long vmprof_write_header_for_jit_addr(void **, long, void*, int); +void *pypy_find_codemap_at_addr(long addr, long *start_addr); +long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, + long *current_pos_addr); +long buffer[]; +""") + +lib = ffi.verify(""" +volatile int pypy_codemap_currently_invalid = 0; + +long buffer[] = {0, 0, 0, 0, 0}; + + + +void *pypy_find_codemap_at_addr(long addr, long *start_addr) +{ + return (void*)buffer; +} + +long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, + long *current_pos_addr) +{ + long c = *current_pos_addr; + if (c >= 5) + return 0; + *current_pos_addr = c + 1; + return *((long*)codemap_raw + c); +} + + +""" + open(str(srcdir.join("get_custom_offset.c"))).read()) + +class TestDirect(object): + def test_infrastructure(self): + cont = ffi.new("long[1]", [0]) + buf = lib.pypy_find_codemap_at_addr(0, cont) + assert buf + cont[0] = 0 + next_addr = lib.pypy_yield_codemap_at_addr(buf, 0, cont) + assert cont[0] == 1 + assert not next_addr + lib.buffer[0] = 13 + cont[0] = 0 + next_addr = lib.pypy_yield_codemap_at_addr(buf, 0, cont) + assert int(ffi.cast("long", next_addr)) == 13 + + def test_write_header_for_jit_addr(self): + lib.buffer[0] = 4 + lib.buffer[1] = 8 + lib.buffer[2] = 12 + lib.buffer[3] = 16 + lib.buffer[4] = 0 + buf = ffi.new("long[5]", [0] * 5) + result = ffi.cast("void**", buf) + res = lib.vmprof_write_header_for_jit_addr(result, 0, ffi.NULL, 100) + assert res == 3 + assert buf[0] == 16 + assert buf[1] == 12 + assert buf[2] == 8 diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py --- a/pypy/module/gc/referents.py +++ b/pypy/module/gc/referents.py @@ -44,30 +44,6 @@ return OperationError(space.w_NotImplementedError, space.wrap("operation not implemented by this GC")) -# ____________________________________________________________ - -def clear_gcflag_extra(fromlist): - pending = fromlist[:] - while pending: - gcref = pending.pop() - if rgc.get_gcflag_extra(gcref): - rgc.toggle_gcflag_extra(gcref) - pending.extend(rgc.get_rpy_referents(gcref)) - -def do_get_objects(): - roots = [gcref for gcref in rgc.get_rpy_roots() if gcref] - pending = roots[:] - result_w = [] - while pending: - gcref = pending.pop() - if not rgc.get_gcflag_extra(gcref): - rgc.toggle_gcflag_extra(gcref) - w_obj = try_cast_gcref_to_w_root(gcref) - if w_obj is not None: - result_w.append(w_obj) - pending.extend(rgc.get_rpy_referents(gcref)) - clear_gcflag_extra(roots) - return result_w # ____________________________________________________________ @@ -116,8 +92,8 @@ break # done. Clear flags carefully rgc.toggle_gcflag_extra(gcarg) - clear_gcflag_extra(roots) - clear_gcflag_extra([gcarg]) + rgc.clear_gcflag_extra(roots) + rgc.clear_gcflag_extra([gcarg]) return result_w # ____________________________________________________________ @@ -189,8 +165,7 @@ """Return a list of all app-level objects.""" if not rgc.has_gcflag_extra(): raise missing_operation(space) - result_w = do_get_objects() - rgc.assert_no_more_gcflags() + result_w = rgc.do_get_objects(try_cast_gcref_to_w_root) return space.newlist(result_w) def get_referents(space, args_w): diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py --- a/pypy/module/pypyjit/interp_resop.py +++ b/pypy/module/pypyjit/interp_resop.py @@ -105,7 +105,7 @@ ofs = ops_offset.get(op, 0) if op.opnum == rop.DEBUG_MERGE_POINT: jd_sd = jitdrivers_sd[op.getarg(0).getint()] - greenkey = op.getarglist()[3:] + greenkey = op.getarglist()[4:] repr = jd_sd.warmstate.get_location_str(greenkey) w_greenkey = wrap_greenkey(space, jd_sd.jitdriver, greenkey, repr) l_w.append(DebugMergePoint(space, jit_hooks._cast_to_gcref(op), diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py --- a/pypy/module/pypyjit/test/test_jit_hook.py +++ b/pypy/module/pypyjit/test/test_jit_hook.py @@ -55,7 +55,7 @@ oplist = parse(""" [i1, i2, p2] i3 = int_add(i1, i2) - debug_merge_point(0, 0, 0, 0, 0, ConstPtr(ptr0)) + debug_merge_point(0, 0, 0, 0, 0, 0, ConstPtr(ptr0)) guard_nonnull(p2) [] guard_true(i3) [] """, namespace={'ptr0': code_gcref}).operations diff --git a/rpython/bin/rpython-vmprof b/rpython/bin/rpython-vmprof new file mode 100755 --- /dev/null +++ b/rpython/bin/rpython-vmprof @@ -0,0 +1,28 @@ +#!/usr/bin/env pypy + +"""RPython translation usage: + +rpython <translation options> target <targetoptions> + +run with --help for more information +""" + +import sys, os +sys.path.insert(0, os.path.dirname(os.path.dirname( + os.path.dirname(os.path.realpath(__file__))))) +from rpython.translator.goal.translate import main + +# no implicit targets +if len(sys.argv) == 1: + print __doc__ + sys.exit(1) + +import _vmprof, subprocess +x = subprocess.Popen('gzip > vmprof.log.gz', shell=True, stdin=subprocess.PIPE) +_vmprof.enable(x.stdin.fileno(), 0.001) +try: + main() +finally: + _vmprof.disable() + x.stdin.close() + x.wait() diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -102,7 +102,7 @@ self.store_reg(mc, r.r0, r.fp, ofs) mc.MOV_rr(r.r0.value, r.fp.value) self.gen_func_epilog(mc) - rawstart = mc.materialize(self.cpu.asmmemmgr, []) + rawstart = mc.materialize(self.cpu, []) self.propagate_exception_path = rawstart def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None, @@ -198,7 +198,7 @@ mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD) mc.B(self.propagate_exception_path) # - rawstart = mc.materialize(self.cpu.asmmemmgr, []) + rawstart = mc.materialize(self.cpu, []) self.stack_check_slowpath = rawstart def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False): @@ -255,7 +255,7 @@ # mc.POP([r.ip.value, r.pc.value]) # - rawstart = mc.materialize(self.cpu.asmmemmgr, []) + rawstart = mc.materialize(self.cpu, []) if for_frame: self.wb_slowpath[4] = rawstart else: @@ -276,7 +276,7 @@ callee_only) # return mc.POP([r.ip.value, r.pc.value]) - return mc.materialize(self.cpu.asmmemmgr, []) + return mc.materialize(self.cpu, []) def _build_malloc_slowpath(self, kind): """ While arriving on slowpath, we have a gcpattern on stack 0. @@ -352,7 +352,7 @@ mc.POP([r.ip.value, r.pc.value]) # - rawstart = mc.materialize(self.cpu.asmmemmgr, []) + rawstart = mc.materialize(self.cpu, []) return rawstart def _reload_frame_if_necessary(self, mc): @@ -473,7 +473,7 @@ mc.MOV_rr(r.r0.value, r.fp.value) # self.gen_func_epilog(mc) - rawstart = mc.materialize(self.cpu.asmmemmgr, []) + rawstart = mc.materialize(self.cpu, []) self.failure_recovery_code[exc + 2 * withfloats] = rawstart def generate_quick_failure(self, guardtok): @@ -851,7 +851,7 @@ # restore registers self._pop_all_regs_from_jitframe(mc, [], self.cpu.supports_floats) mc.POP([r.ip.value, r.pc.value]) # return - self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, []) + self._frame_realloc_slowpath = mc.materialize(self.cpu, []) def _load_shadowstack_top(self, mc, reg, gcrootmap): rst = gcrootmap.get_root_stack_top_addr() @@ -881,7 +881,7 @@ self.datablockwrapper = None allblocks = self.get_asmmemmgr_blocks(looptoken) size = self.mc.get_relative_pos() - res = self.mc.materialize(self.cpu.asmmemmgr, allblocks, + res = self.mc.materialize(self.cpu, allblocks, self.cpu.gc_ll_descr.gcrootmap) self.cpu.asmmemmgr.register_codemap( self.codemap.get_final_bytecode(res, size)) diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py --- a/rpython/jit/backend/arm/runner.py +++ b/rpython/jit/backend/arm/runner.py @@ -50,6 +50,7 @@ def setup_once(self): self.cpuinfo.arch_version = detect_arch_version() self.cpuinfo.hf_abi = detect_hardfloat() + self.codemap.setup() self.assembler.setup_once() def finish_once(self): diff --git a/rpython/jit/backend/arm/test/support.py b/rpython/jit/backend/arm/test/support.py --- a/rpython/jit/backend/arm/test/support.py +++ b/rpython/jit/backend/arm/test/support.py @@ -24,7 +24,7 @@ def run_asm(asm): BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed) - addr = asm.mc.materialize(asm.cpu.asmmemmgr, [], None) + addr = asm.mc.materialize(asm.cpu, [], None) assert addr % 8 == 0 func = rffi.cast(lltype.Ptr(BOOTSTRAP_TP), addr) asm.mc._dump_trace(addr, 'test.asm') diff --git a/rpython/jit/backend/arm/test/test_calling_convention.py b/rpython/jit/backend/arm/test/test_calling_convention.py --- a/rpython/jit/backend/arm/test/test_calling_convention.py +++ b/rpython/jit/backend/arm/test/test_calling_convention.py @@ -29,7 +29,7 @@ mc = InstrBuilder() mc.MOV_rr(r.r0.value, r.sp.value) mc.MOV_rr(r.pc.value, r.lr.value) - return mc.materialize(self.cpu.asmmemmgr, []) + return mc.materialize(self.cpu, []) def get_alignment_requirements(self): return 8 diff --git a/rpython/jit/backend/llsupport/asmmemmgr.py b/rpython/jit/backend/llsupport/asmmemmgr.py --- a/rpython/jit/backend/llsupport/asmmemmgr.py +++ b/rpython/jit/backend/llsupport/asmmemmgr.py @@ -5,9 +5,6 @@ from rpython.rlib.debug import debug_start, debug_print, debug_stop from rpython.rlib.debug import have_debug_prints from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib.rbisect import bisect, bisect_tuple - -_memmngr = None # global reference so we can use @entrypoint :/ class AsmMemoryManager(object): @@ -27,12 +24,6 @@ self.free_blocks = {} # map {start: stop} self.free_blocks_end = {} # map {stop: start} self.blocks_by_size = [[] for i in range(self.num_indices)] - # two lists of jit addresses (sorted) and the corresponding stack - # depths - self.jit_addr_map = [] - self.jit_frame_depth_map = [] - self.jit_codemap = [] - # see codemap.py def malloc(self, minsize, maxsize): """Allocate executable memory, between minsize and maxsize bytes, @@ -54,13 +45,6 @@ if r_uint is not None: self.total_mallocs -= r_uint(stop - start) self._add_free_block(start, stop) - # fix up jit_addr_map - jit_adr_start = bisect(self.jit_addr_map, start) - jit_adr_stop = bisect(self.jit_addr_map, stop) - self.jit_addr_map = (self.jit_addr_map[:jit_adr_start] + - self.jit_addr_map[jit_adr_stop:]) - self.jit_frame_depth_map = (self.jit_frame_depth_map[:jit_adr_start] + - self.jit_frame_depth_map[jit_adr_stop:]) def open_malloc(self, minsize): """Allocate at least minsize bytes. Returns (start, stop).""" @@ -167,35 +151,6 @@ del self.free_blocks_end[stop] return (start, stop) - def register_frame_depth_map(self, rawstart, frame_positions, - frame_assignments): - if not frame_positions: - return - if not self.jit_addr_map or rawstart > self.jit_addr_map[-1]: - start = len(self.jit_addr_map) - self.jit_addr_map += [0] * len(frame_positions) - self.jit_frame_depth_map += [0] * len(frame_positions) - else: - start = bisect(self.jit_addr_map, rawstart) - self.jit_addr_map = (self.jit_addr_map[:start] + - [0] * len(frame_positions) + - self.jit_addr_map[start:]) - self.jit_frame_depth_map = (self.jit_frame_depth_map[:start] + - [0] * len(frame_positions) + - self.jit_frame_depth_map[start:]) - for i, pos in enumerate(frame_positions): - self.jit_addr_map[i + start] = pos + rawstart - self.jit_frame_depth_map[i + start] = frame_assignments[i] - - def register_codemap(self, codemap): - start = codemap[0] - pos = bisect_tuple(self.jit_codemap, start) - if pos == len(self.jit_codemap): # common case - self.jit_codemap.append(codemap) - else: - self.jit_codemap = (self.jit_codemap[:pos] + [codemap] + - self.jit_codemap[pos:]) - def _delete(self): "NOT_RPYTHON" if self._allocated: @@ -351,11 +306,11 @@ # debug_stop(logname) - def materialize(self, asmmemmgr, allblocks, gcrootmap=None): + def materialize(self, cpu, allblocks, gcrootmap=None): size = self.get_relative_pos() align = self.ALIGN_MATERIALIZE size += align - 1 - malloced = asmmemmgr.malloc(size, size) + malloced = cpu.asmmemmgr.malloc(size, size) allblocks.append(malloced) rawstart = malloced[0] rawstart = (rawstart + align - 1) & (-align) @@ -364,8 +319,9 @@ assert gcrootmap is not None for pos, mark in self.gcroot_markers: gcrootmap.register_asm_addr(rawstart + pos, mark) - asmmemmgr.register_frame_depth_map(rawstart, self.frame_positions, - self.frame_assignments) + cpu.codemap.register_frame_depth_map(rawstart, rawstart + size, + self.frame_positions, + self.frame_assignments) self.frame_positions = None self.frame_assignments = None return rawstart diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -130,7 +130,7 @@ self.gcmap_for_finish[0] = r_uint(1) def setup(self, looptoken): - self.codemap = CodemapBuilder() + self.codemap_builder = CodemapBuilder() self._finish_gcmap = lltype.nullptr(jitframe.GCMAP) def set_debug(self, v): @@ -200,7 +200,9 @@ return fail_descr, target def debug_merge_point(self, op): - self.codemap.debug_merge_point(op, self.mc.get_relative_pos()) + self.codemap_builder.debug_merge_point(op.getarg(1).getint(), + op.getarg(3).getint(), + self.mc.get_relative_pos()) def call_assembler(self, op, guard_op, argloc, vloc, result_loc, tmploc): self._store_force_index(guard_op) diff --git a/rpython/jit/backend/llsupport/codemap.py b/rpython/jit/backend/llsupport/codemap.py --- a/rpython/jit/backend/llsupport/codemap.py +++ b/rpython/jit/backend/llsupport/codemap.py @@ -9,79 +9,129 @@ """ +import os from rpython.rlib import rgc +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.entrypoint import jit_entrypoint -from rpython.jit.backend.llsupport import asmmemmgr -from rpython.rlib.rbisect import bisect, bisect_tuple +from rpython.rlib.rbisect import bisect_right, bisect_right_addr +from rpython.rlib.rbisect import bisect_left, bisect_left_addr from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.translator.tool.cbuild import ExternalCompilationInfo +from rpython.translator import cdir -@jit_entrypoint([lltype.Signed], lltype.Signed, - c_name='pypy_jit_stack_depth_at_loc') -@rgc.no_collect -def stack_depth_at_loc(loc): - _memmngr = asmmemmgr._memmngr - pos = bisect(_memmngr.jit_addr_map, loc) - if pos == 0 or pos == len(_memmngr.jit_addr_map): - return -1 - return _memmngr.jit_frame_depth_map[pos-1] +INT_LIST_PTR = rffi.CArrayPtr(lltype.Signed) -@jit_entrypoint([], lltype.Signed, c_name='pypy_jit_start_addr') -def jit_start_addr(): - _memmngr = asmmemmgr._memmngr - return _memmngr.jit_addr_map[0] +srcdir = os.path.join(os.path.dirname(__file__), 'src') -@jit_entrypoint([], lltype.Signed, c_name='pypy_jit_end_addr') -def jit_end_addr(): - _memmngr = asmmemmgr._memmngr +eci = ExternalCompilationInfo(post_include_bits=[""" +#include <stdint.h> +RPY_EXTERN long pypy_jit_codemap_add(uintptr_t addr, + unsigned int machine_code_size, + long *bytecode_info, + unsigned int bytecode_info_size); +RPY_EXTERN long *pypy_jit_codemap_del(uintptr_t addr); +RPY_EXTERN uintptr_t pypy_jit_codemap_firstkey(void); +RPY_EXTERN void *pypy_find_codemap_at_addr(long addr, long* start_addr); +RPY_EXTERN long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, + long *current_pos_addr); - return _memmngr.jit_addr_map[-1] +RPY_EXTERN long pypy_jit_depthmap_add(uintptr_t addr, unsigned int size, + unsigned int stackdepth); +RPY_EXTERN void pypy_jit_depthmap_clear(uintptr_t addr, unsigned int size); -@jit_entrypoint([lltype.Signed], lltype.Signed, - c_name='pypy_find_codemap_at_addr') -def find_codemap_at_addr(addr): - _memmngr = asmmemmgr._memmngr +"""], separate_module_sources=[ + open(os.path.join(srcdir, 'skiplist.c'), 'r').read() + + open(os.path.join(srcdir, 'codemap.c'), 'r').read() +], include_dirs=[cdir]) - res = bisect_tuple(_memmngr.jit_codemap, addr) - 1 - if res == len(_memmngr.jit_codemap): - return -1 - return res +def llexternal(name, args, res): + return rffi.llexternal(name, args, res, compilation_info=eci, + releasegil=False) -@jit_entrypoint([lltype.Signed, lltype.Signed, - rffi.CArrayPtr(lltype.Signed)], lltype.Signed, - c_name='pypy_yield_codemap_at_addr') -def yield_bytecode_at_addr(codemap_no, addr, current_pos_addr): - """ will return consecutive unique_ids from codemap, starting from position - `pos` until addr +pypy_jit_codemap_add = llexternal('pypy_jit_codemap_add', + [lltype.Signed, lltype.Signed, + INT_LIST_PTR, lltype.Signed], + lltype.Signed) +pypy_jit_codemap_del = llexternal('pypy_jit_codemap_del', + [lltype.Signed], INT_LIST_PTR) +pypy_jit_codemap_firstkey = llexternal('pypy_jit_codemap_firstkey', + [], lltype.Signed) + +pypy_jit_depthmap_add = llexternal('pypy_jit_depthmap_add', + [lltype.Signed, lltype.Signed, + lltype.Signed], lltype.Signed) +pypy_jit_depthmap_clear = llexternal('pypy_jit_depthmap_clear', + [lltype.Signed, lltype.Signed], + lltype.Void) + +stack_depth_at_loc = llexternal('pypy_jit_stack_depth_at_loc', + [lltype.Signed], lltype.Signed) +find_codemap_at_addr = llexternal('pypy_find_codemap_at_addr', + [lltype.Signed, rffi.CArrayPtr(lltype.Signed)], lltype.Signed) +yield_bytecode_at_addr = llexternal('pypy_yield_codemap_at_addr', + [lltype.Signed, lltype.Signed, + rffi.CArrayPtr(lltype.Signed)], + lltype.Signed) + + +class CodemapStorage(object): + """ An immortal wrapper around underlaying jit codemap data """ - _memmngr = asmmemmgr._memmngr + def setup(self): + if not we_are_translated(): + # in case someone failed to call free(), in tests only anyway + self.free() - codemap = _memmngr.jit_codemap[codemap_no] - current_pos = current_pos_addr[0] - start_addr = codemap[0] - rel_addr = addr - start_addr - while True: - if current_pos >= len(codemap[2]): - return 0 - next_start = codemap[2][current_pos + 1] - if next_start > rel_addr: - return 0 - next_stop = codemap[2][current_pos + 2] - if next_stop > rel_addr: - current_pos_addr[0] = current_pos + 4 - return codemap[2][current_pos] - # we need to skip potentially more than one - current_pos = codemap[2][current_pos + 3] + def free(self): + while True: + key = pypy_jit_codemap_firstkey() + if not key: + break + items = pypy_jit_codemap_del(key) + lltype.free(items, flavor='raw', track_allocation=False) + + def free_asm_block(self, start, stop): + items = pypy_jit_codemap_del(start) + if items: + lltype.free(items, flavor='raw', track_allocation=False) + pypy_jit_depthmap_clear(start, stop - start) + + def register_frame_depth_map(self, rawstart, rawstop, frame_positions, + frame_assignments): + if not frame_positions: + return + assert len(frame_positions) == len(frame_assignments) + for i in range(len(frame_positions)-1, -1, -1): + pos = rawstart + frame_positions[i] + length = rawstop - pos + if length > 0: + #print "ADD:", pos, length, frame_assignments[i] + pypy_jit_depthmap_add(pos, length, frame_assignments[i]) + rawstop = pos + + def register_codemap(self, (start, size, l)): + items = lltype.malloc(INT_LIST_PTR.TO, len(l), flavor='raw', + track_allocation=False) + for i in range(len(l)): + items[i] = l[i] + if pypy_jit_codemap_add(start, size, items, len(l)) < 0: + lltype.free(items, flavor='raw', track_allocation=False) + + def finish_once(self): + self.free() def unpack_traceback(addr): - codemap_pos = find_codemap_at_addr(addr) - assert codemap_pos >= 0 + codemap_raw = find_codemap_at_addr(addr, + lltype.nullptr(rffi.CArray(lltype.Signed))) + if not codemap_raw: + return [] # no codemap for that position storage = lltype.malloc(rffi.CArray(lltype.Signed), 1, flavor='raw') storage[0] = 0 res = [] while True: - item = yield_bytecode_at_addr(codemap_pos, addr, storage) + item = yield_bytecode_at_addr(codemap_raw, addr, storage) if item == 0: break res.append(item) @@ -95,14 +145,18 @@ self.patch_position = [] self.last_call_depth = -1 - def debug_merge_point(self, op, pos): - call_depth = op.getarg(1).getint() + def debug_merge_point(self, call_depth, unique_id, pos): if call_depth != self.last_call_depth: - unique_id = op.getarg(3).getint() if unique_id == 0: # uninteresting case return assert unique_id & 1 == 0 if call_depth > self.last_call_depth: + assert call_depth == self.last_call_depth + 1 + # ^^^ It should never be the case that we see + # debug_merge_points that suddenly go more than *one* + # call deeper than the previous one (unless we're at + # the start of a bridge, handled by + # inherit_code_from_position()). self.l.append(unique_id) self.l.append(pos) # <- this is a relative pos self.patch_position.append(len(self.l)) @@ -139,4 +193,3 @@ item = self.l[i * 4 + 3] # end in l assert item > 0 return (addr, size, self.l) # XXX compact self.l - diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -16,7 +16,7 @@ FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr, FLAG_POINTER, FLAG_FLOAT) from rpython.jit.backend.llsupport.memcpy import memset_fn -from rpython.jit.backend.llsupport import asmmemmgr +from rpython.jit.backend.llsupport import asmmemmgr, codemap from rpython.rlib.unroll import unrolling_iterable @@ -49,7 +49,7 @@ else: self._setup_exception_handling_untranslated() self.asmmemmgr = asmmemmgr.AsmMemoryManager() - asmmemmgr._memmngr = self.asmmemmgr + self.codemap = codemap.CodemapStorage() self._setup_frame_realloc(translate_support_code) ad = self.gc_ll_descr.getframedescrs(self).arraydescr self.signedarraydescr = ad @@ -79,6 +79,9 @@ def setup(self): pass + def finish_once(self): + self.codemap.finish_once() + def _setup_frame_realloc(self, translate_support_code): FUNC_TP = lltype.Ptr(lltype.FuncType([llmemory.GCREF, lltype.Signed], llmemory.GCREF)) @@ -213,6 +216,7 @@ for rawstart, rawstop in blocks: self.gc_ll_descr.freeing_block(rawstart, rawstop) self.asmmemmgr.free(rawstart, rawstop) + self.codemap.free_asm_block(rawstart, rawstop) def force(self, addr_of_force_token): frame = rffi.cast(jitframe.JITFRAMEPTR, addr_of_force_token) diff --git a/rpython/jit/backend/llsupport/src/codemap.c b/rpython/jit/backend/llsupport/src/codemap.c new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/llsupport/src/codemap.c @@ -0,0 +1,204 @@ +#include "src/precommondefs.h" + +#ifndef HAS_SKIPLIST +# error "skiplist.c needs to be included before" +#endif + +volatile int pypy_codemap_currently_invalid = 0; + +void pypy_codemap_invalid_set(int value) +{ + if (value) + __sync_lock_test_and_set(&pypy_codemap_currently_invalid, 1); + else + __sync_lock_release(&pypy_codemap_currently_invalid); +} + + +/************************************************************/ +/*** codemap storage ***/ +/************************************************************/ + +typedef struct { + unsigned int machine_code_size; + unsigned int bytecode_info_size; + long *bytecode_info; +} codemap_data_t; + +static skipnode_t jit_codemap_head; + +/*** interface used from codemap.py ***/ + +RPY_EXTERN +long pypy_jit_codemap_add(uintptr_t addr, unsigned int machine_code_size, + long *bytecode_info, unsigned int bytecode_info_size) +{ + skipnode_t *new = skiplist_malloc(sizeof(codemap_data_t)); + codemap_data_t *data; + if (new == NULL) + return -1; /* too bad */ + + new->key = addr; + data = (codemap_data_t *)new->data; + data->machine_code_size = machine_code_size; + data->bytecode_info = bytecode_info; + data->bytecode_info_size = bytecode_info_size; + + pypy_codemap_invalid_set(1); + skiplist_insert(&jit_codemap_head, new); + pypy_codemap_invalid_set(0); + return 0; +} + +RPY_EXTERN +long *pypy_jit_codemap_del(uintptr_t addr) +{ + long *result; + skipnode_t *node; + + pypy_codemap_invalid_set(1); + node = skiplist_remove(&jit_codemap_head, addr); + pypy_codemap_invalid_set(0); + + if (node == NULL) + return NULL; + result = ((codemap_data_t *)node->data)->bytecode_info; + free(node); + return result; +} + +RPY_EXTERN +uintptr_t pypy_jit_codemap_firstkey(void) +{ + return skiplist_firstkey(&jit_codemap_head); +} + +/*** interface used from pypy/module/_vmprof ***/ + +RPY_EXTERN +void *pypy_find_codemap_at_addr(long addr, long* start_addr) +{ + skipnode_t *codemap = skiplist_search(&jit_codemap_head, addr); + codemap_data_t *data; + uintptr_t rel_addr; + + if (codemap == &jit_codemap_head) { + if (start_addr) + *start_addr = 0; + return NULL; + } + + rel_addr = (uintptr_t)addr - codemap->key; + data = (codemap_data_t *)codemap->data; + if (rel_addr >= data->machine_code_size) { + if (start_addr) + *start_addr = 0; + return NULL; + } + + if (start_addr) + *start_addr = (long)codemap->key; + return (void *)codemap; +} + +RPY_EXTERN +long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, + long *current_pos_addr) +{ + // will return consecutive unique_ids from codemap, starting from position + // `pos` until addr + skipnode_t *codemap = (skipnode_t *)codemap_raw; + long current_pos = *current_pos_addr; + long rel_addr = addr - codemap->key; + long next_start, next_stop; + codemap_data_t *data = (codemap_data_t *)codemap->data; + + while (1) { + if (current_pos >= data->bytecode_info_size) + return 0; + next_start = data->bytecode_info[current_pos + 1]; + if (next_start > rel_addr) + return 0; + next_stop = data->bytecode_info[current_pos + 2]; + if (next_stop > rel_addr) { + *current_pos_addr = current_pos + 4; + return data->bytecode_info[current_pos]; + } + // we need to skip potentially more than one + current_pos = data->bytecode_info[current_pos + 3]; + } +} + +/************************************************************/ +/*** depthmap storage ***/ +/************************************************************/ + +typedef struct { + unsigned int block_size; + unsigned int stack_depth; +} depthmap_data_t; + +static skipnode_t jit_depthmap_head; + +/*** interface used from codemap.py ***/ + +RPY_EXTERN +long pypy_jit_depthmap_add(uintptr_t addr, unsigned int size, + unsigned int stackdepth) +{ + skipnode_t *new = skiplist_malloc(sizeof(depthmap_data_t)); + depthmap_data_t *data; + if (new == NULL) + return -1; /* too bad */ + + new->key = addr; + data = (depthmap_data_t *)new->data; + data->block_size = size; + data->stack_depth = stackdepth; + + pypy_codemap_invalid_set(1); + skiplist_insert(&jit_depthmap_head, new); + pypy_codemap_invalid_set(0); + return 0; +} + +RPY_EXTERN +void pypy_jit_depthmap_clear(uintptr_t addr, unsigned int size) +{ + uintptr_t search_key = addr + size - 1; + if (size == 0) + return; + + pypy_codemap_invalid_set(1); + while (1) { + /* search for all nodes belonging to the range, and remove them */ + skipnode_t *node = skiplist_search(&jit_depthmap_head, search_key); + if (node->key < addr) + break; /* exhausted */ + skiplist_remove(&jit_depthmap_head, node->key); + free(node); + } + pypy_codemap_invalid_set(0); +} + +/*** interface used from pypy/module/_vmprof ***/ + +RPY_EXTERN +long pypy_jit_stack_depth_at_loc(long loc) +{ + skipnode_t *depthmap = skiplist_search(&jit_depthmap_head, (uintptr_t)loc); + depthmap_data_t *data; + uintptr_t rel_addr; + + if (depthmap == &jit_depthmap_head) + return -1; + + rel_addr = (uintptr_t)loc - depthmap->key; + data = (codemap_data_t *)depthmap->data; + if (rel_addr >= data->block_size) + return -1; + + return data->stack_depth; +} + +/************************************************************/ diff --git a/rpython/jit/backend/llsupport/src/skiplist.c b/rpython/jit/backend/llsupport/src/skiplist.c new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/llsupport/src/skiplist.c @@ -0,0 +1,103 @@ +#include <stdlib.h> +#include <stdint.h> + +#define HAS_SKIPLIST +#define SKIPLIST_HEIGHT 8 + +typedef struct skipnode_s { + uintptr_t key; + char *data; + struct skipnode_s *next[SKIPLIST_HEIGHT]; /* may be smaller */ +} skipnode_t; + +static skipnode_t *skiplist_malloc(uintptr_t datasize) +{ + char *result; + uintptr_t basesize; + uintptr_t length = 1; + while (length < SKIPLIST_HEIGHT && (rand() & 3) == 0) + length++; + basesize = sizeof(skipnode_t) - + (SKIPLIST_HEIGHT - length) * sizeof(skipnode_t *); + result = malloc(basesize + datasize); + if (result != NULL) { + ((skipnode_t *)result)->data = result + basesize; + } + return (skipnode_t *)result; +} + +static skipnode_t *skiplist_search(skipnode_t *head, uintptr_t searchkey) +{ + /* Returns the skipnode with key closest (but <=) searchkey. + Note that if there is no item with key <= searchkey in the list, + this will return the head node. */ + uintptr_t level = SKIPLIST_HEIGHT - 1; + while (1) { + skipnode_t *next = head->next[level]; + if (next != NULL && next->key <= searchkey) { + head = next; + } + else { + if (level == 0) + break; + level -= 1; + } + } + return head; +} + +static void skiplist_insert(skipnode_t *head, skipnode_t *new) +{ + uintptr_t size0 = sizeof(skipnode_t) - + SKIPLIST_HEIGHT * sizeof(skipnode_t *); + uintptr_t height_of_new = (new->data - ((char *)new + size0)) / + sizeof(skipnode_t *); + + uintptr_t level = SKIPLIST_HEIGHT - 1; + uintptr_t searchkey = new->key; + while (1) { + skipnode_t *next = head->next[level]; + if (next != NULL && next->key <= searchkey) { + head = next; + } + else { + if (level < height_of_new) { + new->next[level] = next; + head->next[level] = new; + if (level == 0) + break; + } + level -= 1; + } + } +} + +static skipnode_t *skiplist_remove(skipnode_t *head, uintptr_t exact_key) +{ + uintptr_t level = SKIPLIST_HEIGHT - 1; + while (1) { + skipnode_t *next = head->next[level]; + if (next != NULL && next->key <= exact_key) { + if (next->key == exact_key) { + head->next[level] = next->next[level]; + if (level == 0) + return next; /* successfully removed */ + level -= 1; + } + else + head = next; + } + else { + if (level == 0) + return NULL; /* 'exact_key' not found! */ + level -= 1; + } + } +} + +static uintptr_t skiplist_firstkey(skipnode_t *head) +{ + if (head->next[0] == NULL) + return 0; + return head->next[0]->key; +} diff --git a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py --- a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py +++ b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py @@ -2,7 +2,7 @@ from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin -from rpython.jit.backend.llsupport import asmmemmgr +from rpython.jit.backend.llsupport.codemap import CodemapStorage from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rlib import debug @@ -96,20 +96,21 @@ class TestAsmMemoryManager: def setup_method(self, _): - self.memmgr = AsmMemoryManager(min_fragment=8, + self.asmmemmgr = AsmMemoryManager(min_fragment=8, num_indices=10, large_alloc_size=8192) + self.codemap = CodemapStorage() def teardown_method(self, _): - self.memmgr._delete() + self.asmmemmgr._delete() def test_malloc_simple(self): for i in range(100): - while self.memmgr.total_memory_allocated < 16384: + while self.asmmemmgr.total_memory_allocated < 16384: reqsize = random.randrange(1, 200) - (start, stop) = self.memmgr.malloc(reqsize, reqsize) + (start, stop) = self.asmmemmgr.malloc(reqsize, reqsize) assert reqsize <= stop - start < reqsize + 8 - assert self.memmgr.total_memory_allocated in [8192, 16384] + assert self.asmmemmgr.total_memory_allocated in [8192, 16384] self.teardown_method(None) self.setup_method(None) @@ -123,7 +124,7 @@ if got and (random.random() < 0.4 or len(got) == 1000): # free start, stop = got.pop(random.randrange(0, len(got))) - self.memmgr.free(start, stop) + self.asmmemmgr.free(start, stop) real_use -= (stop - start) assert real_use >= 0 # @@ -134,18 +135,18 @@ reqmaxsize = reqsize else: reqmaxsize = reqsize + random.randrange(0, 200) - (start, stop) = self.memmgr.malloc(reqsize, reqmaxsize) + (start, stop) = self.asmmemmgr.malloc(reqsize, reqmaxsize) assert reqsize <= stop - start < reqmaxsize + 8 for otherstart, otherstop in got: # no overlap assert otherstop <= start or stop <= otherstart got.append((start, stop)) real_use += (stop - start) - if self.memmgr.total_memory_allocated == prev_total: + if self.asmmemmgr.total_memory_allocated == prev_total: iterations_without_allocating_more += 1 if iterations_without_allocating_more == 40000: break # ok else: - new_total = self.memmgr.total_memory_allocated + new_total = self.asmmemmgr.total_memory_allocated iterations_without_allocating_more = 0 print real_use, new_total # We seem to never see a printed value greater @@ -172,7 +173,7 @@ # gcrootmap = FakeGcRootMap() allblocks = [] - rawstart = mc.materialize(self.memmgr, allblocks, gcrootmap) + rawstart = mc.materialize(self, allblocks, gcrootmap) p = rffi.cast(rffi.CArrayPtr(lltype.Char), rawstart) assert p[0] == 'X' assert p[1] == 'x' @@ -268,16 +269,3 @@ md.done() assert allblocks == [(1597, 1697), (1797, 1835)] assert ops == [('free', 1835, 1897)] - -def test_find_jit_frame_depth(): - mgr = AsmMemoryManager() - mgr.register_frame_depth_map(11, [0, 5, 10], [1, 2, 3]) - mgr.register_frame_depth_map(30, [0, 5, 10], [4, 5, 6]) - mgr.register_frame_depth_map(0, [0, 5, 10], [7, 8, 9]) - asmmemmgr._memmngr = mgr - assert asmmemmgr.stack_depth_at_loc(13) == 1 - assert asmmemmgr.stack_depth_at_loc(-3) == -1 - assert asmmemmgr.stack_depth_at_loc(41) == -1 - assert asmmemmgr.stack_depth_at_loc(5) == 8 - assert asmmemmgr.stack_depth_at_loc(17) == 2 - assert asmmemmgr.stack_depth_at_loc(38) == 5 diff --git a/rpython/jit/backend/llsupport/test/test_codemap.py b/rpython/jit/backend/llsupport/test/test_codemap.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/llsupport/test/test_codemap.py @@ -0,0 +1,93 @@ + +from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.jit.backend.llsupport.codemap import stack_depth_at_loc +from rpython.jit.backend.llsupport.codemap import CodemapStorage, \ + CodemapBuilder, unpack_traceback, find_codemap_at_addr + +NULL = lltype.nullptr(rffi.CArray(lltype.Signed)) + +def test_register_codemap(): + codemap = CodemapStorage() + codemap.setup() + codemap.register_codemap((100, 20, [13, 14, 15])) + codemap.register_codemap((300, 30, [16, 17, 18])) + codemap.register_codemap((200, 100, [19, 20, 21, 22, 23])) + # + raw100 = find_codemap_at_addr(100, NULL) + assert find_codemap_at_addr(119, NULL) == raw100 + assert not find_codemap_at_addr(120, NULL) + # + raw200 = find_codemap_at_addr(200, NULL) + assert raw200 != raw100 + assert find_codemap_at_addr(299, NULL) == raw200 + # + raw300 = find_codemap_at_addr(329, NULL) + assert raw300 != raw100 and raw300 != raw200 + assert find_codemap_at_addr(300, NULL) == raw300 + # + codemap.free() + +def test_find_jit_frame_depth(): + codemap = CodemapStorage() + codemap.setup() + codemap.register_frame_depth_map(11, 26, [0, 5, 10], [1, 2, 3]) + codemap.register_frame_depth_map(30, 41, [0, 5, 10], [4, 5, 6]) + codemap.register_frame_depth_map(0, 11, [0, 5, 10], [7, 8, 9]) + assert stack_depth_at_loc(13) == 1 + assert stack_depth_at_loc(-3) == -1 + assert stack_depth_at_loc(40) == 6 + assert stack_depth_at_loc(41) == -1 + assert stack_depth_at_loc(5) == 8 + assert stack_depth_at_loc(17) == 2 + assert stack_depth_at_loc(38) == 5 + assert stack_depth_at_loc(25) == 3 + assert stack_depth_at_loc(26) == -1 + assert stack_depth_at_loc(11) == 1 + assert stack_depth_at_loc(10) == 9 + codemap.free_asm_block(11, 26) + assert stack_depth_at_loc(11) == -1 + assert stack_depth_at_loc(13) == -1 + assert stack_depth_at_loc(-3) == -1 + assert stack_depth_at_loc(40) == 6 + assert stack_depth_at_loc(41) == -1 + assert stack_depth_at_loc(5) == 8 + assert stack_depth_at_loc(38) == 5 + assert stack_depth_at_loc(10) == 9 + codemap.free() + +def test_codemaps(): + builder = CodemapBuilder() + builder.debug_merge_point(0, 102, 0) + builder.debug_merge_point(0, 102, 13) + builder.debug_merge_point(1, 104, 15) + builder.debug_merge_point(1, 104, 16) + builder.debug_merge_point(2, 106, 20) + builder.debug_merge_point(2, 106, 25) + builder.debug_merge_point(1, 104, 30) + builder.debug_merge_point(0, 102, 35) + codemap = CodemapStorage() + codemap.setup() + codemap.register_codemap(builder.get_final_bytecode(100, 40)) + builder = CodemapBuilder() + builder.debug_merge_point(0, 202, 0) + builder.debug_merge_point(0, 202, 10) + builder.debug_merge_point(1, 204, 20) + builder.debug_merge_point(1, 204, 30) + builder.debug_merge_point(2, 206, 40) + builder.debug_merge_point(2, 206, 50) + builder.debug_merge_point(1, 204, 60) + builder.debug_merge_point(0, 202, 70) + codemap.register_codemap(builder.get_final_bytecode(200, 100)) + assert unpack_traceback(110) == [102] + assert unpack_traceback(117) == [102, 104] + assert unpack_traceback(121) == [102, 104, 106] + assert unpack_traceback(131) == [102, 104] + assert unpack_traceback(137) == [102] + assert unpack_traceback(205) == [202] + assert unpack_traceback(225) == [202, 204] + assert unpack_traceback(245) == [202, 204, 206] + assert unpack_traceback(265) == [202, 204] + assert unpack_traceback(275) == [202] + codemap.free_asm_block(200, 300) + assert unpack_traceback(225) == [] _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit