Author: fijal Branch: Changeset: r82078:cc28605e84eb Date: 2016-02-04 20:44 +0100 http://bitbucket.org/pypy/pypy/changeset/cc28605e84eb/
Log: merge vmprof-newstack diff --git a/pypy/module/_vmprof/__init__.py b/pypy/module/_vmprof/__init__.py --- a/pypy/module/_vmprof/__init__.py +++ b/pypy/module/_vmprof/__init__.py @@ -11,6 +11,7 @@ interpleveldefs = { 'enable': 'interp_vmprof.enable', 'disable': 'interp_vmprof.disable', + 'write_all_code_objects': 'interp_vmprof.write_all_code_objects', 'VMProfError': 'space.fromcache(interp_vmprof.Cache).w_VMProfError', } diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -59,11 +59,21 @@ 'interval' is a float representing the sampling interval, in seconds. Must be smaller than 1.0 """ + w_modules = space.sys.get('modules') + if space.is_true(space.contains(w_modules, space.wrap('_continuation'))): + space.warn(space.wrap("Using _continuation/greenlet/stacklet together " + "with vmprof will crash"), + space.w_RuntimeWarning) try: rvmprof.enable(fileno, period) except rvmprof.VMProfError, e: raise VMProfError(space, e) +def write_all_code_objects(space): + """ Needed on cpython, just empty function here + """ + pass + def disable(space): """Disable vmprof. Remember to close the file descriptor afterwards if necessary. diff --git a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py @@ -0,0 +1,86 @@ + +import os, py +from rpython.jit.backend.test.support import CCompiledMixin +from rpython.rlib.jit import JitDriver +from rpython.tool.udir import udir +from rpython.translator.translator import TranslationContext +from rpython.jit.backend.detect_cpu import getcpuclass + +class CompiledVmprofTest(CCompiledMixin): + CPUClass = getcpuclass() + + def _get_TranslationContext(self): + t = TranslationContext() + t.config.translation.gc = 'incminimark' + t.config.translation.list_comprehension_operations = True + return t + + def test_vmprof(self): + from rpython.rlib import rvmprof + + class MyCode: + _vmprof_unique_id = 0 + def __init__(self, name): + self.name = name + + def get_name(code): + return code.name + + code2 = MyCode("py:y:foo:4") + rvmprof.register_code(code2, get_name) + + try: + rvmprof.register_code_object_class(MyCode, get_name) + except rvmprof.VMProfPlatformUnsupported, e: + py.test.skip(str(e)) + + def get_unique_id(code): + return rvmprof.get_unique_id(code) + + driver = JitDriver(greens = ['code'], reds = ['i', 's', 'num'], + is_recursive=True, get_unique_id=get_unique_id) + + @rvmprof.vmprof_execute_code("xcode13", lambda code, num: code) + def main(code, num): + return main_jitted(code, num) + + def main_jitted(code, num): + s = 0 + i = 0 + while i < num: + driver.jit_merge_point(code=code, i=i, s=s, num=num) + s += (i << 1) + if i % 3 == 0 and code is not code2: + main(code2, 100) + i += 1 + return s + + tmpfilename = str(udir.join('test_rvmprof')) + + def f(num): + code = MyCode("py:x:foo:3") + rvmprof.register_code(code, get_name) + fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) + period = 0.0001 + rvmprof.enable(fd, period) + res = main(code, num) + #assert res == 499999500000 + rvmprof.disable() + os.close(fd) + return 0 + + def check_vmprof_output(): + from vmprof import read_profile + tmpfile = str(udir.join('test_rvmprof')) + stats = read_profile(tmpfile) + t = stats.get_tree() + assert t.name == 'py:x:foo:3' + assert len(t.children) == 1 # jit + + self.meta_interp(f, [1000000], inline=True) + try: + import vmprof + except ImportError: + pass + else: + check_vmprof_output() \ No newline at end of file diff --git a/rpython/jit/backend/test/test_rvmprof.py b/rpython/jit/backend/test/test_rvmprof.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/test/test_rvmprof.py @@ -0,0 +1,49 @@ +import py +from rpython.rlib import jit +from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rvmprof import cintf +from rpython.jit.backend.x86.arch import WORD +from rpython.jit.codewriter.policy import JitPolicy + +class BaseRVMProfTest(object): + def test_one(self): + py.test.skip("needs thread-locals in the JIT, which is only available " + "after translation") + visited = [] + + def helper(): + stack = cintf.vmprof_tl_stack.getraw() + if stack: + # not during tracing + visited.append(stack.c_value) + else: + visited.append(0) + + llfn = llhelper(lltype.Ptr(lltype.FuncType([], lltype.Void)), helper) + + driver = jit.JitDriver(greens=[], reds='auto') + + def f(n): + i = 0 + while i < n: + driver.jit_merge_point() + i += 1 + llfn() + + class Hooks(jit.JitHookInterface): + def after_compile(self, debug_info): + self.raw_start = debug_info.asminfo.rawstart + + hooks = Hooks() + + null = lltype.nullptr(cintf.VMPROFSTACK) + cintf.vmprof_tl_stack.setraw(null) # make it empty + self.meta_interp(f, [10], policy=JitPolicy(hooks)) + v = set(visited) + assert 0 in v + v.remove(0) + assert len(v) == 1 + assert 0 <= list(v)[0] - hooks.raw_start <= 10*1024 + assert cintf.vmprof_tl_stack.getraw() == null + # ^^^ make sure we didn't leave anything dangling diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py --- a/rpython/jit/backend/x86/arch.py +++ b/rpython/jit/backend/x86/arch.py @@ -31,7 +31,7 @@ if WORD == 4: # ebp + ebx + esi + edi + 15 extra words = 19 words - FRAME_FIXED_SIZE = 19 + FRAME_FIXED_SIZE = 19 + 4 # 4 for vmprof, XXX make more compact! PASS_ON_MY_FRAME = 15 JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float # 'threadlocal_addr' is passed as 2nd argument on the stack, @@ -41,7 +41,7 @@ THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD else: # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19 - FRAME_FIXED_SIZE = 19 + FRAME_FIXED_SIZE = 19 + 4 # 4 for vmprof, XXX make more compact! PASS_ON_MY_FRAME = 12 JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM # 'threadlocal_addr' is passed as 2nd argument in %esi, diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -12,7 +12,7 @@ from rpython.jit.metainterp.compile import ResumeGuardDescr from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory from rpython.rtyper.lltypesystem.lloperation import llop -from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref +from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper import rclass from rpython.rlib.jit import AsmInfo from rpython.jit.backend.model import CompiledLoopToken @@ -837,11 +837,56 @@ frame_depth = max(frame_depth, target_frame_depth) return frame_depth + def _call_header_vmprof(self): + from rpython.rlib.rvmprof.rvmprof import cintf, VMPROF_JITTED_TAG + + # tloc = address of pypy_threadlocal_s + if IS_X86_32: + # Can't use esi here, its old value is not saved yet. + # But we can use eax and ecx. + self.mc.MOV_rs(edx.value, THREADLOCAL_OFS) + tloc = edx + old = ecx + else: + # The thread-local value is already in esi. + # We should avoid if possible to use ecx or edx because they + # would be used to pass arguments #3 and #4 (even though, so + # far, the assembler only receives two arguments). + tloc = esi + old = r11 + # eax = address in the stack of a 3-words struct vmprof_stack_s + self.mc.LEA_rs(eax.value, (FRAME_FIXED_SIZE - 4) * WORD) + # old = current value of vmprof_tl_stack + offset = cintf.vmprof_tl_stack.getoffset() + self.mc.MOV_rm(old.value, (tloc.value, offset)) + # eax->next = old + self.mc.MOV_mr((eax.value, 0), old.value) + # eax->value = my esp + self.mc.MOV_mr((eax.value, WORD), esp.value) + # eax->kind = VMPROF_JITTED_TAG + self.mc.MOV_mi((eax.value, WORD * 2), VMPROF_JITTED_TAG) + # save in vmprof_tl_stack the new eax + self.mc.MOV_mr((tloc.value, offset), eax.value) + + def _call_footer_vmprof(self): + from rpython.rlib.rvmprof.rvmprof import cintf + # edx = address of pypy_threadlocal_s + self.mc.MOV_rs(edx.value, THREADLOCAL_OFS) + self.mc.AND_ri(edx.value, ~1) + # eax = (our local vmprof_tl_stack).next + self.mc.MOV_rs(eax.value, (FRAME_FIXED_SIZE - 4 + 0) * WORD) + # save in vmprof_tl_stack the value eax + offset = cintf.vmprof_tl_stack.getoffset() + self.mc.MOV_mr((edx.value, offset), eax.value) + def _call_header(self): self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD) self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value) if IS_X86_64: self.mc.MOV_sr(THREADLOCAL_OFS, esi.value) + if self.cpu.translate_support_code: + self._call_header_vmprof() # on X86_64, this uses esi + if IS_X86_64: self.mc.MOV_rr(ebp.value, edi.value) else: self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD) @@ -873,6 +918,8 @@ def _call_footer(self): # the return value is the jitframe + if self.cpu.translate_support_code: + self._call_footer_vmprof() self.mc.MOV_rr(eax.value, ebp.value) gcrootmap = self.cpu.gc_ll_descr.gcrootmap diff --git a/rpython/jit/backend/x86/test/test_rvmprof.py b/rpython/jit/backend/x86/test/test_rvmprof.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/test/test_rvmprof.py @@ -0,0 +1,7 @@ + +import py +from rpython.jit.backend.test.test_rvmprof import BaseRVMProfTest +from rpython.jit.backend.x86.test.test_basic import Jit386Mixin + +class TestFfiCall(Jit386Mixin, BaseRVMProfTest): + pass \ No newline at end of file diff --git a/rpython/jit/backend/x86/test/test_zrpy_vmprof.py b/rpython/jit/backend/x86/test/test_zrpy_vmprof.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/test/test_zrpy_vmprof.py @@ -0,0 +1,7 @@ + +from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import CompiledVmprofTest + +class TestZVMprof(CompiledVmprofTest): + + gcrootfinder = "shadowstack" + gc = "incminimark" \ No newline at end of file diff --git a/rpython/jit/backend/x86/test/test_zvmprof.py b/rpython/jit/backend/x86/test/test_zvmprof.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/test/test_zvmprof.py @@ -0,0 +1,7 @@ + +from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import CompiledVmprofTest + +class TestZVMprof(CompiledVmprofTest): + + gcrootfinder = "shadowstack" + gc = "incminimark" \ No newline at end of file diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -1332,7 +1332,7 @@ tlfield = ThreadLocalField(lltype.Signed, 'foobar_test_', loop_invariant=loop_inv) OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET - c = const(tlfield.offset) + c = const(tlfield.getoffset()) v = varoftype(lltype.Signed) op = SpaceOperation('threadlocalref_get', [c], v) cc = FakeBuiltinCallControl() diff --git a/rpython/jit/metainterp/quasiimmut.py b/rpython/jit/metainterp/quasiimmut.py --- a/rpython/jit/metainterp/quasiimmut.py +++ b/rpython/jit/metainterp/quasiimmut.py @@ -51,6 +51,7 @@ class QuasiImmut(object): llopaque = True compress_limit = 30 + looptokens_wrefs = None def __init__(self, cpu): self.cpu = cpu @@ -75,7 +76,7 @@ def compress_looptokens_list(self): self.looptokens_wrefs = [wref for wref in self.looptokens_wrefs if wref() is not None] - # NB. we must keep around the looptoken_wrefs that are + # NB. we must keep around the looptokens_wrefs that are # already invalidated; see below self.compress_limit = (len(self.looptokens_wrefs) + 15) * 2 @@ -83,6 +84,9 @@ # When this is called, all the loops that we record become # invalid: all GUARD_NOT_INVALIDATED in these loops (and # in attached bridges) must now fail. + if self.looptokens_wrefs is None: + # can't happen, but helps compiled tests + return wrefs = self.looptokens_wrefs self.looptokens_wrefs = [] for wref in wrefs: diff --git a/rpython/jit/metainterp/test/test_jitdriver.py b/rpython/jit/metainterp/test/test_jitdriver.py --- a/rpython/jit/metainterp/test/test_jitdriver.py +++ b/rpython/jit/metainterp/test/test_jitdriver.py @@ -193,7 +193,7 @@ return pc + 1 driver = JitDriver(greens=["pc"], reds='auto', - get_unique_id=get_unique_id) + get_unique_id=get_unique_id, is_recursive=True) def f(arg): i = 0 diff --git a/rpython/jit/metainterp/test/test_recursive.py b/rpython/jit/metainterp/test/test_recursive.py --- a/rpython/jit/metainterp/test/test_recursive.py +++ b/rpython/jit/metainterp/test/test_recursive.py @@ -1312,7 +1312,7 @@ return (code + 1) * 2 driver = JitDriver(greens=["pc", "code"], reds='auto', - get_unique_id=get_unique_id) + get_unique_id=get_unique_id, is_recursive=True) def f(pc, code): i = 0 diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -623,6 +623,8 @@ raise AttributeError("no 'greens' or 'reds' supplied") if virtualizables is not None: self.virtualizables = virtualizables + if get_unique_id is not None: + assert is_recursive, "get_unique_id and is_recursive must be specified at the same time" for v in self.virtualizables: assert v in self.reds # if reds are automatic, they won't be passed to jit_merge_point, so diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py --- a/rpython/rlib/rthread.py +++ b/rpython/rlib/rthread.py @@ -308,7 +308,7 @@ offset = CDefinedIntSymbolic('RPY_TLOFS_%s' % self.fieldname, default='?') offset.loop_invariant = loop_invariant - self.offset = offset + self._offset = offset def getraw(): if we_are_translated(): @@ -364,7 +364,7 @@ ThreadLocalField.__init__(self, lltype.Signed, 'tlref%d' % unique_id, loop_invariant=loop_invariant) setraw = self.setraw - offset = self.offset + offset = self._offset def get(): if we_are_translated(): diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -5,41 +5,41 @@ from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.tool import rffi_platform as platform +from rpython.rlib import rthread from rpython.jit.backend import detect_cpu class VMProfPlatformUnsupported(Exception): pass +ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') +SRC = ROOT.join('src') + +if sys.platform.startswith('linux'): + _libs = ['dl'] +else: + _libs = [] +eci_kwds = dict( + include_dirs = [SRC], + includes = ['rvmprof.h'], + libraries = _libs, + separate_module_files = [SRC.join('rvmprof.c')], + post_include_bits=['#define RPYTHON_VMPROF\n'], + ) +global_eci = ExternalCompilationInfo(**eci_kwds) + + def setup(): if not detect_cpu.autodetect().startswith(detect_cpu.MODEL_X86_64): raise VMProfPlatformUnsupported("rvmprof only supports" " x86-64 CPUs for now") - - ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof') - SRC = ROOT.join('src') - - - if sys.platform.startswith('linux'): - libs = ['dl'] - else: - libs = [] - - eci_kwds = dict( - include_dirs = [SRC], - includes = ['rvmprof.h'], - libraries = libs, - separate_module_files = [SRC.join('rvmprof.c')], - post_include_bits=['#define RPYTHON_VMPROF\n'], - ) - eci = ExternalCompilationInfo(**eci_kwds) - platform.verify_eci(ExternalCompilationInfo( compile_extra=['-DRPYTHON_LL2CTYPES'], **eci_kwds)) + eci = global_eci vmprof_init = rffi.llexternal("vmprof_init", [rffi.INT, rffi.DOUBLE, rffi.CCHARP], rffi.CCHARP, compilation_info=eci) @@ -55,7 +55,8 @@ rffi.INT, compilation_info=eci) vmprof_ignore_signals = rffi.llexternal("vmprof_ignore_signals", [rffi.INT], lltype.Void, - compilation_info=eci) + compilation_info=eci, + _nowrapper=True) return CInterface(locals()) @@ -67,112 +68,34 @@ def _freeze_(self): return True -def token2lltype(tok): - if tok == 'i': - return lltype.Signed - if tok == 'r': - return llmemory.GCREF - raise NotImplementedError(repr(tok)) -def make_trampoline_function(name, func, token, restok): - from rpython.jit.backend import detect_cpu +# --- copy a few declarations from src/vmprof_stack.h --- - cont_name = 'rpyvmprof_f_%s_%s' % (name, token) - tramp_name = 'rpyvmprof_t_%s_%s' % (name, token) - orig_tramp_name = tramp_name +VMPROF_CODE_TAG = 1 - func.c_name = cont_name - func._dont_inline_ = True +VMPROFSTACK = lltype.ForwardReference() +PVMPROFSTACK = lltype.Ptr(VMPROFSTACK) +VMPROFSTACK.become(rffi.CStruct("vmprof_stack_s", + ('next', PVMPROFSTACK), + ('value', lltype.Signed), + ('kind', lltype.Signed))) +# ---------- - if sys.platform == 'darwin': - # according to internet "At the time UNIX was written in 1974...." - # "... all C functions are prefixed with _" - cont_name = '_' + cont_name - tramp_name = '_' + tramp_name - PLT = "" - size_decl = "" - type_decl = "" - extra_align = "" - else: - PLT = "@PLT" - type_decl = "\t.type\t%s, @function" % (tramp_name,) - size_decl = "\t.size\t%s, .-%s" % ( - tramp_name, tramp_name) - extra_align = "\t.cfi_def_cfa_offset 8" - assert detect_cpu.autodetect().startswith(detect_cpu.MODEL_X86_64), ( - "rvmprof only supports x86-64 CPUs for now") +vmprof_tl_stack = rthread.ThreadLocalField(PVMPROFSTACK, "vmprof_tl_stack") +do_use_eci = rffi.llexternal_use_eci( + ExternalCompilationInfo(includes=['vmprof_stack.h'], + include_dirs = [SRC])) - # mapping of argument count (not counting the final uid argument) to - # the register that holds this uid argument - reg = {0: '%rdi', - 1: '%rsi', - 2: '%rdx', - 3: '%rcx', - 4: '%r8', - 5: '%r9', - } - try: - reg = reg[len(token)] - except KeyError: - raise NotImplementedError( - "not supported: %r takes more than 5 arguments" % (func,)) +def enter_code(unique_id): + do_use_eci() + s = lltype.malloc(VMPROFSTACK, flavor='raw') + s.c_next = vmprof_tl_stack.get_or_make_raw() + s.c_value = unique_id + s.c_kind = VMPROF_CODE_TAG + vmprof_tl_stack.setraw(s) + return s - target = udir.join('module_cache') - target.ensure(dir=1) - target = target.join('trampoline_%s_%s.vmprof.s' % (name, token)) - # NOTE! the tabs in this file are absolutely essential, things - # that don't start with \t are silently ignored (<arigato>: WAT!?) - target.write("""\ -\t.text -\t.globl\t%(tramp_name)s -%(type_decl)s -%(tramp_name)s: -\t.cfi_startproc -\tpushq\t%(reg)s -\t.cfi_def_cfa_offset 16 -\tcall %(cont_name)s%(PLT)s -\taddq\t$8, %%rsp -%(extra_align)s -\tret -\t.cfi_endproc -%(size_decl)s -""" % locals()) - - def tok2cname(tok): - if tok == 'i': - return 'long' - if tok == 'r': - return 'void *' - raise NotImplementedError(repr(tok)) - - header = 'RPY_EXTERN %s %s(%s);\n' % ( - tok2cname(restok), - orig_tramp_name, - ', '.join([tok2cname(tok) for tok in token] + ['long'])) - - header += """\ -static int cmp_%s(void *addr) { - if (addr == %s) return 1; -#ifdef VMPROF_ADDR_OF_TRAMPOLINE - return VMPROF_ADDR_OF_TRAMPOLINE(addr); -#undef VMPROF_ADDR_OF_TRAMPOLINE -#else - return 0; -#endif -#define VMPROF_ADDR_OF_TRAMPOLINE cmp_%s -} -""" % (tramp_name, orig_tramp_name, tramp_name) - - eci = ExternalCompilationInfo( - post_include_bits = [header], - separate_module_files = [str(target)], - ) - - return rffi.llexternal( - orig_tramp_name, - [token2lltype(tok) for tok in token] + [lltype.Signed], - token2lltype(restok), - compilation_info=eci, - _nowrapper=True, sandboxsafe=True, - random_effects_on_gcobjs=True) +def leave_code(s): + vmprof_tl_stack.setraw(s.c_next) + lltype.free(s, flavor='raw') diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -4,12 +4,19 @@ from rpython.rlib.rvmprof import cintf from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance -from rpython.rtyper.lltypesystem import rffi +from rpython.rtyper.lltypesystem import rffi, llmemory +from rpython.rtyper.lltypesystem.lloperation import llop MAX_FUNC_NAME = 1023 # ____________________________________________________________ +# keep in sync with vmprof_stack.h +VMPROF_CODE_TAG = 1 +VMPROF_BLACKHOLE_TAG = 2 +VMPROF_JITTED_TAG = 3 +VMPROF_JITTING_TAG = 4 +VMPROF_GC_TAG = 5 class VMProfError(Exception): def __init__(self, msg): @@ -19,17 +26,16 @@ class VMProf(object): + _immutable_fields_ = ['is_enabled?'] + def __init__(self): "NOT_RPYTHON: use _get_vmprof()" self._code_classes = set() self._gather_all_code_objs = lambda: None self._cleanup_() - if sys.maxint == 2147483647: - self._code_unique_id = 0 # XXX this is wrong, it won't work on 32bit - else: - self._code_unique_id = 0x7000000000000000 + self._code_unique_id = 4 self.cintf = cintf.setup() - + def _cleanup_(self): self.is_enabled = False @@ -127,7 +133,6 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") - def vmprof_execute_code(name, get_code_fn, result_class=None): """Decorator to be used on the function that interprets a code object. @@ -136,12 +141,7 @@ 'get_code_fn(*args)' is called to extract the code object from the arguments given to the decorated function. - The original function can return None, an integer, or an instance. - In the latter case (only), 'result_class' must be set. - - NOTE: for now, this assumes that the decorated functions only takes - instances or plain integer arguments, and at most 5 of them - (including 'self' if applicable). + 'result_class' is ignored (backward compatibility). """ def decorate(func): try: @@ -149,52 +149,19 @@ except cintf.VMProfPlatformUnsupported: return func - if hasattr(func, 'im_self'): - assert func.im_self is None - func = func.im_func - - def lower(*args): - if len(args) == 0: - return (), "" - ll_args, token = lower(*args[1:]) - ll_arg = args[0] - if isinstance(ll_arg, int): - tok = "i" - else: - tok = "r" - ll_arg = cast_instance_to_gcref(ll_arg) - return (ll_arg,) + ll_args, tok + token - - @specialize.memo() - def get_ll_trampoline(token): - if result_class is None: - restok = "i" - else: - restok = "r" - return cintf.make_trampoline_function(name, func, token, restok) - def decorated_function(*args): - # go through the asm trampoline ONLY if we are translated but not - # being JITted. - # - # If we are not translated, we obviously don't want to go through - # the trampoline because there is no C function it can call. - # # If we are being JITted, we want to skip the trampoline, else the # JIT cannot see through it. - # - if we_are_translated() and not jit.we_are_jitted(): - # if we are translated, call the trampoline + if not jit.we_are_jitted(): unique_id = get_code_fn(*args)._vmprof_unique_id - ll_args, token = lower(*args) - ll_trampoline = get_ll_trampoline(token) - ll_result = ll_trampoline(*ll_args + (unique_id,)) - if result_class is not None: - return cast_base_ptr_to_instance(result_class, ll_result) - else: - return ll_result + x = cintf.enter_code(unique_id) + try: + return func(*args) + finally: + cintf.leave_code(x) else: return func(*args) + decorated_function.__name__ = func.__name__ + '_rvmprof' return decorated_function diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c --- a/rpython/rlib/rvmprof/src/rvmprof.c +++ b/rpython/rlib/rvmprof/src/rvmprof.c @@ -12,10 +12,12 @@ #else # include "common_header.h" +# include "structdef.h" +# include "src/threadlocal.h" # include "rvmprof.h" -# ifndef VMPROF_ADDR_OF_TRAMPOLINE +/*# ifndef VMPROF_ADDR_OF_TRAMPOLINE # error "RPython program using rvmprof, but not calling vmprof_execute_code()" -# endif +# endif*/ #endif diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h --- a/rpython/rlib/rvmprof/src/rvmprof.h +++ b/rpython/rlib/rvmprof/src/rvmprof.h @@ -4,3 +4,7 @@ RPY_EXTERN int vmprof_enable(void); RPY_EXTERN int vmprof_disable(void); RPY_EXTERN int vmprof_register_virtual_function(char *, long, int); +RPY_EXTERN void* vmprof_stack_new(void); +RPY_EXTERN int vmprof_stack_append(void*, long); +RPY_EXTERN long vmprof_stack_pop(void*); +RPY_EXTERN void vmprof_stack_free(void*); diff --git a/rpython/rlib/rvmprof/src/vmprof_common.h b/rpython/rlib/rvmprof/src/vmprof_common.h new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/vmprof_common.h @@ -0,0 +1,72 @@ +#include <stddef.h> + +#define MAX_FUNC_NAME 1024 + +static int profile_file = -1; +static long prepare_interval_usec = 0; +static long profile_interval_usec = 0; +static int opened_profile(char *interp_name); + +#define MAX_STACK_DEPTH \ + ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *)) + +#define MARKER_STACKTRACE '\x01' +#define MARKER_VIRTUAL_IP '\x02' +#define MARKER_TRAILER '\x03' +#define MARKER_INTERP_NAME '\x04' /* deprecated */ +#define MARKER_HEADER '\x05' + +#define VERSION_BASE '\x00' +#define VERSION_THREAD_ID '\x01' +#define VERSION_TAG '\x02' + +typedef struct prof_stacktrace_s { + char padding[sizeof(long) - 1]; + char marker; + long count, depth; + void *stack[]; +} prof_stacktrace_s; + + +RPY_EXTERN +char *vmprof_init(int fd, double interval, char *interp_name) +{ + if (interval < 1e-6 || interval >= 1.0) + return "bad value for 'interval'"; + prepare_interval_usec = (int)(interval * 1000000.0); + + if (prepare_concurrent_bufs() < 0) + return "out of memory"; + + assert(fd >= 0); + profile_file = fd; + if (opened_profile(interp_name) < 0) { + profile_file = -1; + return strerror(errno); + } + return NULL; +} + +static int _write_all(const char *buf, size_t bufsize); + +static int opened_profile(char *interp_name) +{ + struct { + long hdr[5]; + char interp_name[259]; + } header; + + size_t namelen = strnlen(interp_name, 255); + + header.hdr[0] = 0; + header.hdr[1] = 3; + header.hdr[2] = 0; + header.hdr[3] = prepare_interval_usec; + header.hdr[4] = 0; + header.interp_name[0] = MARKER_HEADER; + header.interp_name[1] = '\x00'; + header.interp_name[2] = VERSION_TAG; + header.interp_name[3] = namelen; + memcpy(&header.interp_name[4], interp_name, namelen); + return _write_all((char*)&header, 5 * sizeof(long) + 4 + namelen); +} diff --git a/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h --- a/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h +++ b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h @@ -1,119 +1,49 @@ -#ifdef PYPY_JIT_CODEMAP void *pypy_find_codemap_at_addr(long addr, long *start_addr); long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, long *current_pos_addr); long pypy_jit_stack_depth_at_loc(long loc); -#endif -#ifdef CPYTHON_GET_CUSTOM_OFFSET -static void *tramp_start, *tramp_end; -#endif - - -static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) { - -#if defined(PYPY_JIT_CODEMAP) - - intptr_t ip_l = (intptr_t)ip; - return pypy_jit_stack_depth_at_loc(ip_l); - -#elif defined(CPYTHON_GET_CUSTOM_OFFSET) - - if (ip >= tramp_start && ip <= tramp_end) { - // XXX the return value is wrong for all the places before push and - // after pop, fix - void *bp; - void *sp; - - /* This is a stage2 trampoline created by hotpatch: - - push %rbx - push %rbp - mov %rsp,%rbp - and $0xfffffffffffffff0,%rsp // make sure the stack is aligned - movabs $0x7ffff687bb10,%rbx - callq *%rbx - leaveq - pop %rbx - retq - - the stack layout is like this: - - +-----------+ high addresses - | ret addr | - +-----------+ - | saved rbx | start of the function frame - +-----------+ - | saved rbp | - +-----------+ - | ........ | <-- rbp - +-----------+ low addresses - - So, the trampoline frame starts at rbp+16, and the return address, - is at rbp+24. The vmprof API requires us to return the offset of - the frame relative to sp, hence we have this weird computation. - - XXX (antocuni): I think we could change the API to return directly - the frame address instead of the offset; however, this require a - change in the PyPy code too - */ - - unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp); - unw_get_reg (cp, UNW_X86_64_RBP, (unw_word_t*)&bp); - return bp+16+8-sp; - } - return -1; - -#else - - return -1; - -#endif -} - -static long vmprof_write_header_for_jit_addr(void **result, long n, - void *ip, int max_depth) +static long vmprof_write_header_for_jit_addr(intptr_t *result, long n, + intptr_t ip, int max_depth) { #ifdef PYPY_JIT_CODEMAP void *codemap; long current_pos = 0; - intptr_t id; + intptr_t ident; long start_addr = 0; intptr_t addr = (intptr_t)ip; int start, k; - void *tmp; + intptr_t tmp; codemap = pypy_find_codemap_at_addr(addr, &start_addr); - if (codemap == NULL) - // not a jit code at all + if (codemap == NULL || n >= max_depth - 2) + // not a jit code at all or almost max depth return n; // modify the last entry to point to start address and not the random one // in the middle - result[n - 1] = (void*)start_addr; - result[n] = (void*)2; - n++; + result[n++] = VMPROF_ASSEMBLER_TAG; + result[n++] = start_addr; start = n; while (n < max_depth) { - id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); - if (id == -1) + ident = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); + if (ident == -1) // finish break; - if (id == 0) + if (ident == 0) continue; // not main codemap - result[n++] = (void *)id; + result[n++] = VMPROF_JITTED_TAG; + result[n++] = ident; } - k = 0; + k = 1; + while (k < (n - start) / 2) { tmp = result[start + k]; - result[start + k] = result[n - k - 1]; - result[n - k - 1] = tmp; - k++; - } - if (n < max_depth) { - result[n++] = (void*)3; + result[start + k] = result[n - k]; + result[n - k] = tmp; + k += 2; } #endif return n; diff --git a/rpython/rlib/rvmprof/src/vmprof_getpc.h b/rpython/rlib/rvmprof/src/vmprof_getpc.h --- a/rpython/rlib/rvmprof/src/vmprof_getpc.h +++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h @@ -134,7 +134,7 @@ } }; -void* GetPC(ucontext_t *signal_ucontext) { +intptr_t GetPC(ucontext_t *signal_ucontext) { // See comment above struct CallUnrollInfo. Only try instruction // flow matching if both eip and esp looks reasonable. const int eip = signal_ucontext->uc_mcontext.gregs[REG_EIP]; @@ -146,12 +146,12 @@ if (!memcmp(eip_char + callunrollinfo[i].pc_offset, callunrollinfo[i].ins, callunrollinfo[i].ins_size)) { // We have a match. - void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset); + intptr_t *retaddr = (intptr_t*)(esp + callunrollinfo[i].return_sp_offset); return *retaddr; } } } - return (void*)eip; + return eip; } // Special case #2: Windows, which has to do something totally different. @@ -170,7 +170,7 @@ typedef int ucontext_t; #endif -void* GetPC(ucontext_t *signal_ucontext) { +intptr_t GetPC(ucontext_t *signal_ucontext) { RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n"); return NULL; } @@ -180,11 +180,11 @@ // the right value for your system, and add it to the list in // configure.ac (or set it manually in your config.h). #else -void* GetPC(ucontext_t *signal_ucontext) { +intptr_t GetPC(ucontext_t *signal_ucontext) { #ifdef __APPLE__ - return (void*)(signal_ucontext->uc_mcontext->__ss.__rip); + return (signal_ucontext->uc_mcontext->__ss.__rip); #else - return (void*)signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h + return signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h #endif } diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h b/rpython/rlib/rvmprof/src/vmprof_main.h --- a/rpython/rlib/rvmprof/src/vmprof_main.h +++ b/rpython/rlib/rvmprof/src/vmprof_main.h @@ -25,84 +25,28 @@ #include <sys/time.h> #include <errno.h> #include <unistd.h> +#include <stddef.h> #include <stdio.h> #include <sys/types.h> #include <signal.h> #include <sys/stat.h> +#include <unistd.h> #include <fcntl.h> #include "vmprof_getpc.h" -#ifdef __APPLE__ -#include "libunwind.h" -#else -#include "vmprof_unwind.h" -#endif #include "vmprof_mt.h" - +#include "vmprof_stack.h" +#include "vmprof_common.h" /************************************************************/ -// functions copied from libunwind using dlopen - -#ifndef __APPLE__ // should be linux only probably -static int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL; -static int (*unw_step)(unw_cursor_t*) = NULL; -static int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL; -static int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL; -#endif - -static int profile_file = -1; static long prepare_interval_usec; +static long saved_profile_file; static struct profbuf_s *volatile current_codes; static void *(*mainloop_get_virtual_ip)(char *) = 0; static int opened_profile(char *interp_name); static void flush_codes(void); -#ifdef __APPLE__ -#define UNWIND_NAME "/usr/lib/system/libunwind.dylib" -#define UNW_PREFIX "unw" -#else -#define UNWIND_NAME "libunwind.so" -#define UNW_PREFIX "_ULx86_64" -#endif - -RPY_EXTERN -char *vmprof_init(int fd, double interval, char *interp_name) -{ - if (interval < 1e-6 || interval >= 1.0) - return "bad value for 'interval'"; - prepare_interval_usec = (int)(interval * 1000000.0); - -#ifndef __APPLE__ - if (!unw_get_reg) { - void *libhandle; - - if (!(libhandle = dlopen(UNWIND_NAME, RTLD_LAZY | RTLD_LOCAL))) - goto error; - if (!(unw_get_reg = dlsym(libhandle, UNW_PREFIX "_get_reg"))) - goto error; - if (!(unw_get_proc_info = dlsym(libhandle, UNW_PREFIX "_get_proc_info"))) - goto error; - if (!(unw_init_local = dlsym(libhandle, UNW_PREFIX "_init_local"))) - goto error; - if (!(unw_step = dlsym(libhandle, UNW_PREFIX "_step"))) - goto error; - } -#endif - if (prepare_concurrent_bufs() < 0) - return "out of memory"; - - assert(fd >= 0); - profile_file = fd; - if (opened_profile(interp_name) < 0) { - profile_file = -1; - return strerror(errno); - } - return NULL; - - error: - return dlerror(); -} /************************************************************/ @@ -131,131 +75,62 @@ * ************************************************************* */ -#define MAX_FUNC_NAME 128 -#define MAX_STACK_DEPTH \ - ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *)) - -#define MARKER_STACKTRACE '\x01' -#define MARKER_VIRTUAL_IP '\x02' -#define MARKER_TRAILER '\x03' -#define MARKER_INTERP_NAME '\x04' /* deprecated */ -#define MARKER_HEADER '\x05' - -#define VERSION_BASE '\x00' -#define VERSION_THREAD_ID '\x01' - -struct prof_stacktrace_s { - char padding[sizeof(long) - 1]; - char marker; - long count, depth; - void *stack[]; -}; - -static long profile_interval_usec = 0; static char atfork_hook_installed = 0; -/* ****************************************************** - * libunwind workaround for process JIT frames correctly - * ****************************************************** - */ - #include "vmprof_get_custom_offset.h" -typedef struct { - void* _unused1; - void* _unused2; - void* sp; - void* ip; - void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4]; -} vmprof_hacked_unw_cursor_t; - -static int vmprof_unw_step(unw_cursor_t *cp, int first_run) -{ - void* ip; - void* sp; - ptrdiff_t sp_offset; - unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip); - unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp); - if (!first_run) { - // make sure we're pointing to the CALL and not to the first - // instruction after. If the callee adjusts the stack for us - // it's not safe to be at the instruction after - ip -= 1; - } - sp_offset = vmprof_unw_get_custom_offset(ip, cp); - - if (sp_offset == -1) { - // it means that the ip is NOT in JITted code, so we can use the - // stardard unw_step - return unw_step(cp); - } - else { - // this is a horrible hack to manually walk the stack frame, by - // setting the IP and SP in the cursor - vmprof_hacked_unw_cursor_t *cp2 = (vmprof_hacked_unw_cursor_t*)cp; - void* bp = (void*)sp + sp_offset; - cp2->sp = bp; - bp -= sizeof(void*); - cp2->ip = ((void**)bp)[0]; - // the ret is on the top of the stack minus WORD - return 1; - } -} - - /* ************************************************************* * functions to dump the stack trace * ************************************************************* */ -static int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) + +#ifndef RPYTHON_LL2CTYPES +static vmprof_stack_t *get_vmprof_stack(void) { - void *ip; - int n = 0; - unw_cursor_t cursor; -#ifdef __APPLE__ - unw_context_t uc; - unw_getcontext(&uc); + return RPY_THREADLOCALREF_GET(vmprof_tl_stack); +} #else - unw_context_t uc = *ucontext; +static vmprof_stack_t *get_vmprof_stack(void) +{ + return 0; +} #endif - int ret = unw_init_local(&cursor, &uc); - assert(ret >= 0); - (void)ret; - - while (n < max_depth) { - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { - break; +static int get_stack_trace(intptr_t *result, int max_depth, intptr_t pc, ucontext_t *ucontext) +{ + vmprof_stack_t* stack = get_vmprof_stack(); + int n = 0; + intptr_t addr = 0; + int bottom_jitted = 0; + // check if the pc is in JIT +#ifdef PYPY_JIT_CODEMAP + if (pypy_find_codemap_at_addr((intptr_t)pc, &addr)) { + // the bottom part is jitted, means we can fill up the first part + // from the JIT + n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth); + stack = stack->next; // skip the first item as it contains garbage + } +#endif + while (n < max_depth - 1 && stack) { + if (stack->kind == VMPROF_CODE_TAG) { + result[n] = stack->kind; + result[n + 1] = stack->value; + n += 2; } - - unw_proc_info_t pip; - unw_get_proc_info(&cursor, &pip); - - /* if n==0, it means that the signal handler interrupted us while we - were in the trampoline, so we are not executing (yet) the real main - loop function; just skip it */ - if (VMPROF_ADDR_OF_TRAMPOLINE((void*)pip.start_ip) && n > 0) { - // found main loop stack frame - void* sp; - unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp); - if (mainloop_get_virtual_ip) - ip = mainloop_get_virtual_ip((char *)sp); - else - ip = *(void **)sp; +#ifdef PYPY_JIT_CODEMAP + else if (stack->kind == VMPROF_JITTED_TAG) { + pc = ((intptr_t*)(stack->value - sizeof(intptr_t)))[0]; + n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth); } - - int first_run = (n == 0); - result[n++] = ip; - n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth); - if (vmprof_unw_step(&cursor, first_run) <= 0) - break; +#endif + stack = stack->next; } return n; } -static void *get_current_thread_id(void) +static intptr_t get_current_thread_id(void) { /* xxx This function is a hack on two fronts: @@ -269,7 +144,7 @@ An alternative would be to try to look if the information is available in the ucontext_t in the caller. */ - return (void *)pthread_self(); + return (intptr_t)pthread_self(); } @@ -278,8 +153,43 @@ * ************************************************************* */ +#include <setjmp.h> + +volatile int spinlock; +jmp_buf restore_point; + +static void segfault_handler(int arg) +{ + longjmp(restore_point, SIGSEGV); +} + static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { +#ifdef __APPLE__ + // TERRIBLE HACK AHEAD + // on OS X, the thread local storage is sometimes uninitialized + // when the signal handler runs - it means it's impossible to read errno + // or call any syscall or read PyThread_Current or pthread_self. Additionally, + // it seems impossible to read the register gs. + // here we register segfault handler (all guarded by a spinlock) and call + // longjmp in case segfault happens while reading a thread local + while (__sync_lock_test_and_set(&spinlock, 1)) { + } + signal(SIGSEGV, &segfault_handler); + int fault_code = setjmp(restore_point); + if (fault_code == 0) { + pthread_self(); + get_current_thread_id(); + } else { + signal(SIGSEGV, SIG_DFL); + __sync_synchronize(); + spinlock = 0; + return; + } + signal(SIGSEGV, SIG_DFL); + __sync_synchronize(); + spinlock = 0; +#endif long val = __sync_fetch_and_add(&signal_handler_value, 2L); if ((val & 1) == 0) { @@ -296,9 +206,8 @@ struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data; st->marker = MARKER_STACKTRACE; st->count = 1; - st->stack[0] = GetPC((ucontext_t*)ucontext); - depth = get_stack_trace(st->stack+1, MAX_STACK_DEPTH-2, ucontext); - depth++; // To account for pc value in stack[0]; + depth = get_stack_trace(st->stack, + MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext), ucontext); st->depth = depth; st->stack[depth++] = get_current_thread_id(); p->data_offset = offsetof(struct prof_stacktrace_s, marker); @@ -363,12 +272,15 @@ static void atfork_disable_timer(void) { if (profile_interval_usec > 0) { + saved_profile_file = profile_file; + profile_file = -1; remove_sigprof_timer(); } } static void atfork_enable_timer(void) { if (profile_interval_usec > 0) { + profile_file = saved_profile_file; install_sigprof_timer(); } } @@ -415,7 +327,7 @@ return -1; } -static int _write_all(const void *buf, size_t bufsize) +static int _write_all(const char *buf, size_t bufsize) { while (bufsize > 0) { ssize_t count = write(profile_file, buf, bufsize); @@ -427,71 +339,13 @@ return 0; } -static int opened_profile(char *interp_name) -{ - struct { - long hdr[5]; - char interp_name[259]; - } header; - - size_t namelen = strnlen(interp_name, 255); - current_codes = NULL; - - header.hdr[0] = 0; - header.hdr[1] = 3; - header.hdr[2] = 0; - header.hdr[3] = prepare_interval_usec; - header.hdr[4] = 0; - header.interp_name[0] = MARKER_HEADER; - header.interp_name[1] = '\x00'; - header.interp_name[2] = VERSION_THREAD_ID; - header.interp_name[3] = namelen; - memcpy(&header.interp_name[4], interp_name, namelen); - return _write_all(&header, 5 * sizeof(long) + 4 + namelen); -} - static int close_profile(void) { - char buf[4096]; - ssize_t size; unsigned char marker = MARKER_TRAILER; if (_write_all(&marker, 1) < 0) return -1; -#ifdef __linux__ - // copy /proc/self/maps to the end of the profile file - int srcfd = open("/proc/self/maps", O_RDONLY); - if (srcfd < 0) - return -1; - - while ((size = read(srcfd, buf, sizeof buf)) > 0) { - if (_write_all(buf, size) < 0) { - close(srcfd); - return -1; - } - } - close(srcfd); -#else - // freebsd and mac -#if defined(__APPLE__) - sprintf(buf, "vmmap %d", getpid()); -#else - sprintf(buf, "procstat -v %d", getpid()); -#endif - FILE *srcf = popen(buf, "r"); - if (!srcf) - return -1; - - while ((size = fread(buf, 1, sizeof buf, srcf))) { - if (_write_all(buf, size) < 0) { - pclose(srcf); - return -1; - } - } - pclose(srcf); -#endif - /* don't close() the file descriptor from here */ profile_file = -1; return 0; @@ -522,6 +376,9 @@ struct profbuf_s *p; char *t; + if (profile_file == -1) + return 0; // silently don't write it + retry: p = current_codes; if (p != NULL) { @@ -529,7 +386,7 @@ /* grabbed 'current_codes': we will append the current block to it if it contains enough room */ size_t freesize = SINGLE_BUF_SIZE - p->data_size; - if (freesize < blocklen) { + if (freesize < (size_t)blocklen) { /* full: flush it */ commit_buffer(profile_file, p); p = NULL; diff --git a/rpython/rlib/rvmprof/src/vmprof_stack.h b/rpython/rlib/rvmprof/src/vmprof_stack.h new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/vmprof_stack.h @@ -0,0 +1,25 @@ +#ifndef _VMPROF_STACK_H_ +#define _VMPROF_STACK_H_ + +#include <unistd.h> + +#define VMPROF_CODE_TAG 1 /* <- also in cintf.py */ +#define VMPROF_BLACKHOLE_TAG 2 +#define VMPROF_JITTED_TAG 3 +#define VMPROF_JITTING_TAG 4 +#define VMPROF_GC_TAG 5 +#define VMPROF_ASSEMBLER_TAG 6 +// whatever we want here + +typedef struct vmprof_stack_s { + struct vmprof_stack_s* next; + intptr_t value; + intptr_t kind; +} vmprof_stack_t; + +// the kind is WORD so we consume exactly 3 WORDs and we don't have +// to worry too much. There is a potential for squeezing it with bit +// patterns into one WORD, but I don't want to care RIGHT NOW, potential +// for future optimization potential + +#endif diff --git a/rpython/rlib/rvmprof/test/test_ztranslation.py b/rpython/rlib/rvmprof/test/test_ztranslation.py --- a/rpython/rlib/rvmprof/test/test_ztranslation.py +++ b/rpython/rlib/rvmprof/test/test_ztranslation.py @@ -64,8 +64,14 @@ def test_interpreted(): # takes forever if the Python process is already big... import subprocess - subprocess.check_call([sys.executable, os.path.basename(__file__)], - cwd=(os.path.dirname(__file__) or '.')) + me = os.path.basename(__file__) + if me.endswith('pyc') or me.endswith('pyo'): + me = me[:-1] + env = os.environ.copy() + env['PYTHONPATH'] = '' + subprocess.check_call([sys.executable, me], + cwd=(os.path.dirname(__file__) or '.'), + env=env) def test_compiled(): fn = compile(main, [], gcpolicy="minimark") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit