Author: fijal
Branch:
Changeset: r82078:cc28605e84eb
Date: 2016-02-04 20:44 +0100
http://bitbucket.org/pypy/pypy/changeset/cc28605e84eb/
Log: merge vmprof-newstack
diff --git a/pypy/module/_vmprof/__init__.py b/pypy/module/_vmprof/__init__.py
--- a/pypy/module/_vmprof/__init__.py
+++ b/pypy/module/_vmprof/__init__.py
@@ -11,6 +11,7 @@
interpleveldefs = {
'enable': 'interp_vmprof.enable',
'disable': 'interp_vmprof.disable',
+ 'write_all_code_objects': 'interp_vmprof.write_all_code_objects',
'VMProfError': 'space.fromcache(interp_vmprof.Cache).w_VMProfError',
}
diff --git a/pypy/module/_vmprof/interp_vmprof.py
b/pypy/module/_vmprof/interp_vmprof.py
--- a/pypy/module/_vmprof/interp_vmprof.py
+++ b/pypy/module/_vmprof/interp_vmprof.py
@@ -59,11 +59,21 @@
'interval' is a float representing the sampling interval, in seconds.
Must be smaller than 1.0
"""
+ w_modules = space.sys.get('modules')
+ if space.is_true(space.contains(w_modules, space.wrap('_continuation'))):
+ space.warn(space.wrap("Using _continuation/greenlet/stacklet together "
+ "with vmprof will crash"),
+ space.w_RuntimeWarning)
try:
rvmprof.enable(fileno, period)
except rvmprof.VMProfError, e:
raise VMProfError(space, e)
+def write_all_code_objects(space):
+ """ Needed on cpython, just empty function here
+ """
+ pass
+
def disable(space):
"""Disable vmprof. Remember to close the file descriptor afterwards
if necessary.
diff --git a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
@@ -0,0 +1,86 @@
+
+import os, py
+from rpython.jit.backend.test.support import CCompiledMixin
+from rpython.rlib.jit import JitDriver
+from rpython.tool.udir import udir
+from rpython.translator.translator import TranslationContext
+from rpython.jit.backend.detect_cpu import getcpuclass
+
+class CompiledVmprofTest(CCompiledMixin):
+ CPUClass = getcpuclass()
+
+ def _get_TranslationContext(self):
+ t = TranslationContext()
+ t.config.translation.gc = 'incminimark'
+ t.config.translation.list_comprehension_operations = True
+ return t
+
+ def test_vmprof(self):
+ from rpython.rlib import rvmprof
+
+ class MyCode:
+ _vmprof_unique_id = 0
+ def __init__(self, name):
+ self.name = name
+
+ def get_name(code):
+ return code.name
+
+ code2 = MyCode("py:y:foo:4")
+ rvmprof.register_code(code2, get_name)
+
+ try:
+ rvmprof.register_code_object_class(MyCode, get_name)
+ except rvmprof.VMProfPlatformUnsupported, e:
+ py.test.skip(str(e))
+
+ def get_unique_id(code):
+ return rvmprof.get_unique_id(code)
+
+ driver = JitDriver(greens = ['code'], reds = ['i', 's', 'num'],
+ is_recursive=True, get_unique_id=get_unique_id)
+
+ @rvmprof.vmprof_execute_code("xcode13", lambda code, num: code)
+ def main(code, num):
+ return main_jitted(code, num)
+
+ def main_jitted(code, num):
+ s = 0
+ i = 0
+ while i < num:
+ driver.jit_merge_point(code=code, i=i, s=s, num=num)
+ s += (i << 1)
+ if i % 3 == 0 and code is not code2:
+ main(code2, 100)
+ i += 1
+ return s
+
+ tmpfilename = str(udir.join('test_rvmprof'))
+
+ def f(num):
+ code = MyCode("py:x:foo:3")
+ rvmprof.register_code(code, get_name)
+ fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
+ period = 0.0001
+ rvmprof.enable(fd, period)
+ res = main(code, num)
+ #assert res == 499999500000
+ rvmprof.disable()
+ os.close(fd)
+ return 0
+
+ def check_vmprof_output():
+ from vmprof import read_profile
+ tmpfile = str(udir.join('test_rvmprof'))
+ stats = read_profile(tmpfile)
+ t = stats.get_tree()
+ assert t.name == 'py:x:foo:3'
+ assert len(t.children) == 1 # jit
+
+ self.meta_interp(f, [1000000], inline=True)
+ try:
+ import vmprof
+ except ImportError:
+ pass
+ else:
+ check_vmprof_output()
\ No newline at end of file
diff --git a/rpython/jit/backend/test/test_rvmprof.py
b/rpython/jit/backend/test/test_rvmprof.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/test/test_rvmprof.py
@@ -0,0 +1,49 @@
+import py
+from rpython.rlib import jit
+from rpython.rtyper.annlowlevel import llhelper
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rvmprof import cintf
+from rpython.jit.backend.x86.arch import WORD
+from rpython.jit.codewriter.policy import JitPolicy
+
+class BaseRVMProfTest(object):
+ def test_one(self):
+ py.test.skip("needs thread-locals in the JIT, which is only available "
+ "after translation")
+ visited = []
+
+ def helper():
+ stack = cintf.vmprof_tl_stack.getraw()
+ if stack:
+ # not during tracing
+ visited.append(stack.c_value)
+ else:
+ visited.append(0)
+
+ llfn = llhelper(lltype.Ptr(lltype.FuncType([], lltype.Void)), helper)
+
+ driver = jit.JitDriver(greens=[], reds='auto')
+
+ def f(n):
+ i = 0
+ while i < n:
+ driver.jit_merge_point()
+ i += 1
+ llfn()
+
+ class Hooks(jit.JitHookInterface):
+ def after_compile(self, debug_info):
+ self.raw_start = debug_info.asminfo.rawstart
+
+ hooks = Hooks()
+
+ null = lltype.nullptr(cintf.VMPROFSTACK)
+ cintf.vmprof_tl_stack.setraw(null) # make it empty
+ self.meta_interp(f, [10], policy=JitPolicy(hooks))
+ v = set(visited)
+ assert 0 in v
+ v.remove(0)
+ assert len(v) == 1
+ assert 0 <= list(v)[0] - hooks.raw_start <= 10*1024
+ assert cintf.vmprof_tl_stack.getraw() == null
+ # ^^^ make sure we didn't leave anything dangling
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -31,7 +31,7 @@
if WORD == 4:
# ebp + ebx + esi + edi + 15 extra words = 19 words
- FRAME_FIXED_SIZE = 19
+ FRAME_FIXED_SIZE = 19 + 4 # 4 for vmprof, XXX make more compact!
PASS_ON_MY_FRAME = 15
JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
# 'threadlocal_addr' is passed as 2nd argument on the stack,
@@ -41,7 +41,7 @@
THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
else:
# rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
- FRAME_FIXED_SIZE = 19
+ FRAME_FIXED_SIZE = 19 + 4 # 4 for vmprof, XXX make more compact!
PASS_ON_MY_FRAME = 12
JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
# 'threadlocal_addr' is passed as 2nd argument in %esi,
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -12,7 +12,7 @@
from rpython.jit.metainterp.compile import ResumeGuardDescr
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.rtyper import rclass
from rpython.rlib.jit import AsmInfo
from rpython.jit.backend.model import CompiledLoopToken
@@ -837,11 +837,56 @@
frame_depth = max(frame_depth, target_frame_depth)
return frame_depth
+ def _call_header_vmprof(self):
+ from rpython.rlib.rvmprof.rvmprof import cintf, VMPROF_JITTED_TAG
+
+ # tloc = address of pypy_threadlocal_s
+ if IS_X86_32:
+ # Can't use esi here, its old value is not saved yet.
+ # But we can use eax and ecx.
+ self.mc.MOV_rs(edx.value, THREADLOCAL_OFS)
+ tloc = edx
+ old = ecx
+ else:
+ # The thread-local value is already in esi.
+ # We should avoid if possible to use ecx or edx because they
+ # would be used to pass arguments #3 and #4 (even though, so
+ # far, the assembler only receives two arguments).
+ tloc = esi
+ old = r11
+ # eax = address in the stack of a 3-words struct vmprof_stack_s
+ self.mc.LEA_rs(eax.value, (FRAME_FIXED_SIZE - 4) * WORD)
+ # old = current value of vmprof_tl_stack
+ offset = cintf.vmprof_tl_stack.getoffset()
+ self.mc.MOV_rm(old.value, (tloc.value, offset))
+ # eax->next = old
+ self.mc.MOV_mr((eax.value, 0), old.value)
+ # eax->value = my esp
+ self.mc.MOV_mr((eax.value, WORD), esp.value)
+ # eax->kind = VMPROF_JITTED_TAG
+ self.mc.MOV_mi((eax.value, WORD * 2), VMPROF_JITTED_TAG)
+ # save in vmprof_tl_stack the new eax
+ self.mc.MOV_mr((tloc.value, offset), eax.value)
+
+ def _call_footer_vmprof(self):
+ from rpython.rlib.rvmprof.rvmprof import cintf
+ # edx = address of pypy_threadlocal_s
+ self.mc.MOV_rs(edx.value, THREADLOCAL_OFS)
+ self.mc.AND_ri(edx.value, ~1)
+ # eax = (our local vmprof_tl_stack).next
+ self.mc.MOV_rs(eax.value, (FRAME_FIXED_SIZE - 4 + 0) * WORD)
+ # save in vmprof_tl_stack the value eax
+ offset = cintf.vmprof_tl_stack.getoffset()
+ self.mc.MOV_mr((edx.value, offset), eax.value)
+
def _call_header(self):
self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD)
self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value)
if IS_X86_64:
self.mc.MOV_sr(THREADLOCAL_OFS, esi.value)
+ if self.cpu.translate_support_code:
+ self._call_header_vmprof() # on X86_64, this uses esi
+ if IS_X86_64:
self.mc.MOV_rr(ebp.value, edi.value)
else:
self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD)
@@ -873,6 +918,8 @@
def _call_footer(self):
# the return value is the jitframe
+ if self.cpu.translate_support_code:
+ self._call_footer_vmprof()
self.mc.MOV_rr(eax.value, ebp.value)
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
diff --git a/rpython/jit/backend/x86/test/test_rvmprof.py
b/rpython/jit/backend/x86/test/test_rvmprof.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/test/test_rvmprof.py
@@ -0,0 +1,7 @@
+
+import py
+from rpython.jit.backend.test.test_rvmprof import BaseRVMProfTest
+from rpython.jit.backend.x86.test.test_basic import Jit386Mixin
+
+class TestFfiCall(Jit386Mixin, BaseRVMProfTest):
+ pass
\ No newline at end of file
diff --git a/rpython/jit/backend/x86/test/test_zrpy_vmprof.py
b/rpython/jit/backend/x86/test/test_zrpy_vmprof.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/test/test_zrpy_vmprof.py
@@ -0,0 +1,7 @@
+
+from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import
CompiledVmprofTest
+
+class TestZVMprof(CompiledVmprofTest):
+
+ gcrootfinder = "shadowstack"
+ gc = "incminimark"
\ No newline at end of file
diff --git a/rpython/jit/backend/x86/test/test_zvmprof.py
b/rpython/jit/backend/x86/test/test_zvmprof.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/test/test_zvmprof.py
@@ -0,0 +1,7 @@
+
+from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import
CompiledVmprofTest
+
+class TestZVMprof(CompiledVmprofTest):
+
+ gcrootfinder = "shadowstack"
+ gc = "incminimark"
\ No newline at end of file
diff --git a/rpython/jit/codewriter/test/test_jtransform.py
b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -1332,7 +1332,7 @@
tlfield = ThreadLocalField(lltype.Signed, 'foobar_test_',
loop_invariant=loop_inv)
OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET
- c = const(tlfield.offset)
+ c = const(tlfield.getoffset())
v = varoftype(lltype.Signed)
op = SpaceOperation('threadlocalref_get', [c], v)
cc = FakeBuiltinCallControl()
diff --git a/rpython/jit/metainterp/quasiimmut.py
b/rpython/jit/metainterp/quasiimmut.py
--- a/rpython/jit/metainterp/quasiimmut.py
+++ b/rpython/jit/metainterp/quasiimmut.py
@@ -51,6 +51,7 @@
class QuasiImmut(object):
llopaque = True
compress_limit = 30
+ looptokens_wrefs = None
def __init__(self, cpu):
self.cpu = cpu
@@ -75,7 +76,7 @@
def compress_looptokens_list(self):
self.looptokens_wrefs = [wref for wref in self.looptokens_wrefs
if wref() is not None]
- # NB. we must keep around the looptoken_wrefs that are
+ # NB. we must keep around the looptokens_wrefs that are
# already invalidated; see below
self.compress_limit = (len(self.looptokens_wrefs) + 15) * 2
@@ -83,6 +84,9 @@
# When this is called, all the loops that we record become
# invalid: all GUARD_NOT_INVALIDATED in these loops (and
# in attached bridges) must now fail.
+ if self.looptokens_wrefs is None:
+ # can't happen, but helps compiled tests
+ return
wrefs = self.looptokens_wrefs
self.looptokens_wrefs = []
for wref in wrefs:
diff --git a/rpython/jit/metainterp/test/test_jitdriver.py
b/rpython/jit/metainterp/test/test_jitdriver.py
--- a/rpython/jit/metainterp/test/test_jitdriver.py
+++ b/rpython/jit/metainterp/test/test_jitdriver.py
@@ -193,7 +193,7 @@
return pc + 1
driver = JitDriver(greens=["pc"], reds='auto',
- get_unique_id=get_unique_id)
+ get_unique_id=get_unique_id, is_recursive=True)
def f(arg):
i = 0
diff --git a/rpython/jit/metainterp/test/test_recursive.py
b/rpython/jit/metainterp/test/test_recursive.py
--- a/rpython/jit/metainterp/test/test_recursive.py
+++ b/rpython/jit/metainterp/test/test_recursive.py
@@ -1312,7 +1312,7 @@
return (code + 1) * 2
driver = JitDriver(greens=["pc", "code"], reds='auto',
- get_unique_id=get_unique_id)
+ get_unique_id=get_unique_id, is_recursive=True)
def f(pc, code):
i = 0
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -623,6 +623,8 @@
raise AttributeError("no 'greens' or 'reds' supplied")
if virtualizables is not None:
self.virtualizables = virtualizables
+ if get_unique_id is not None:
+ assert is_recursive, "get_unique_id and is_recursive must be
specified at the same time"
for v in self.virtualizables:
assert v in self.reds
# if reds are automatic, they won't be passed to jit_merge_point, so
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -308,7 +308,7 @@
offset = CDefinedIntSymbolic('RPY_TLOFS_%s' % self.fieldname,
default='?')
offset.loop_invariant = loop_invariant
- self.offset = offset
+ self._offset = offset
def getraw():
if we_are_translated():
@@ -364,7 +364,7 @@
ThreadLocalField.__init__(self, lltype.Signed, 'tlref%d' % unique_id,
loop_invariant=loop_invariant)
setraw = self.setraw
- offset = self.offset
+ offset = self._offset
def get():
if we_are_translated():
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -5,41 +5,41 @@
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.rtyper.tool import rffi_platform as platform
+from rpython.rlib import rthread
from rpython.jit.backend import detect_cpu
class VMProfPlatformUnsupported(Exception):
pass
+ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof')
+SRC = ROOT.join('src')
+
+if sys.platform.startswith('linux'):
+ _libs = ['dl']
+else:
+ _libs = []
+eci_kwds = dict(
+ include_dirs = [SRC],
+ includes = ['rvmprof.h'],
+ libraries = _libs,
+ separate_module_files = [SRC.join('rvmprof.c')],
+ post_include_bits=['#define RPYTHON_VMPROF\n'],
+ )
+global_eci = ExternalCompilationInfo(**eci_kwds)
+
+
def setup():
if not detect_cpu.autodetect().startswith(detect_cpu.MODEL_X86_64):
raise VMProfPlatformUnsupported("rvmprof only supports"
" x86-64 CPUs for now")
-
- ROOT = py.path.local(rpythonroot).join('rpython', 'rlib', 'rvmprof')
- SRC = ROOT.join('src')
-
-
- if sys.platform.startswith('linux'):
- libs = ['dl']
- else:
- libs = []
-
- eci_kwds = dict(
- include_dirs = [SRC],
- includes = ['rvmprof.h'],
- libraries = libs,
- separate_module_files = [SRC.join('rvmprof.c')],
- post_include_bits=['#define RPYTHON_VMPROF\n'],
- )
- eci = ExternalCompilationInfo(**eci_kwds)
-
platform.verify_eci(ExternalCompilationInfo(
compile_extra=['-DRPYTHON_LL2CTYPES'],
**eci_kwds))
+ eci = global_eci
vmprof_init = rffi.llexternal("vmprof_init",
[rffi.INT, rffi.DOUBLE, rffi.CCHARP],
rffi.CCHARP, compilation_info=eci)
@@ -55,7 +55,8 @@
rffi.INT, compilation_info=eci)
vmprof_ignore_signals = rffi.llexternal("vmprof_ignore_signals",
[rffi.INT], lltype.Void,
- compilation_info=eci)
+ compilation_info=eci,
+ _nowrapper=True)
return CInterface(locals())
@@ -67,112 +68,34 @@
def _freeze_(self):
return True
-def token2lltype(tok):
- if tok == 'i':
- return lltype.Signed
- if tok == 'r':
- return llmemory.GCREF
- raise NotImplementedError(repr(tok))
-def make_trampoline_function(name, func, token, restok):
- from rpython.jit.backend import detect_cpu
+# --- copy a few declarations from src/vmprof_stack.h ---
- cont_name = 'rpyvmprof_f_%s_%s' % (name, token)
- tramp_name = 'rpyvmprof_t_%s_%s' % (name, token)
- orig_tramp_name = tramp_name
+VMPROF_CODE_TAG = 1
- func.c_name = cont_name
- func._dont_inline_ = True
+VMPROFSTACK = lltype.ForwardReference()
+PVMPROFSTACK = lltype.Ptr(VMPROFSTACK)
+VMPROFSTACK.become(rffi.CStruct("vmprof_stack_s",
+ ('next', PVMPROFSTACK),
+ ('value', lltype.Signed),
+ ('kind', lltype.Signed)))
+# ----------
- if sys.platform == 'darwin':
- # according to internet "At the time UNIX was written in 1974...."
- # "... all C functions are prefixed with _"
- cont_name = '_' + cont_name
- tramp_name = '_' + tramp_name
- PLT = ""
- size_decl = ""
- type_decl = ""
- extra_align = ""
- else:
- PLT = "@PLT"
- type_decl = "\t.type\t%s, @function" % (tramp_name,)
- size_decl = "\t.size\t%s, .-%s" % (
- tramp_name, tramp_name)
- extra_align = "\t.cfi_def_cfa_offset 8"
- assert detect_cpu.autodetect().startswith(detect_cpu.MODEL_X86_64), (
- "rvmprof only supports x86-64 CPUs for now")
+vmprof_tl_stack = rthread.ThreadLocalField(PVMPROFSTACK, "vmprof_tl_stack")
+do_use_eci = rffi.llexternal_use_eci(
+ ExternalCompilationInfo(includes=['vmprof_stack.h'],
+ include_dirs = [SRC]))
- # mapping of argument count (not counting the final uid argument) to
- # the register that holds this uid argument
- reg = {0: '%rdi',
- 1: '%rsi',
- 2: '%rdx',
- 3: '%rcx',
- 4: '%r8',
- 5: '%r9',
- }
- try:
- reg = reg[len(token)]
- except KeyError:
- raise NotImplementedError(
- "not supported: %r takes more than 5 arguments" % (func,))
+def enter_code(unique_id):
+ do_use_eci()
+ s = lltype.malloc(VMPROFSTACK, flavor='raw')
+ s.c_next = vmprof_tl_stack.get_or_make_raw()
+ s.c_value = unique_id
+ s.c_kind = VMPROF_CODE_TAG
+ vmprof_tl_stack.setraw(s)
+ return s
- target = udir.join('module_cache')
- target.ensure(dir=1)
- target = target.join('trampoline_%s_%s.vmprof.s' % (name, token))
- # NOTE! the tabs in this file are absolutely essential, things
- # that don't start with \t are silently ignored (<arigato>: WAT!?)
- target.write("""\
-\t.text
-\t.globl\t%(tramp_name)s
-%(type_decl)s
-%(tramp_name)s:
-\t.cfi_startproc
-\tpushq\t%(reg)s
-\t.cfi_def_cfa_offset 16
-\tcall %(cont_name)s%(PLT)s
-\taddq\t$8, %%rsp
-%(extra_align)s
-\tret
-\t.cfi_endproc
-%(size_decl)s
-""" % locals())
-
- def tok2cname(tok):
- if tok == 'i':
- return 'long'
- if tok == 'r':
- return 'void *'
- raise NotImplementedError(repr(tok))
-
- header = 'RPY_EXTERN %s %s(%s);\n' % (
- tok2cname(restok),
- orig_tramp_name,
- ', '.join([tok2cname(tok) for tok in token] + ['long']))
-
- header += """\
-static int cmp_%s(void *addr) {
- if (addr == %s) return 1;
-#ifdef VMPROF_ADDR_OF_TRAMPOLINE
- return VMPROF_ADDR_OF_TRAMPOLINE(addr);
-#undef VMPROF_ADDR_OF_TRAMPOLINE
-#else
- return 0;
-#endif
-#define VMPROF_ADDR_OF_TRAMPOLINE cmp_%s
-}
-""" % (tramp_name, orig_tramp_name, tramp_name)
-
- eci = ExternalCompilationInfo(
- post_include_bits = [header],
- separate_module_files = [str(target)],
- )
-
- return rffi.llexternal(
- orig_tramp_name,
- [token2lltype(tok) for tok in token] + [lltype.Signed],
- token2lltype(restok),
- compilation_info=eci,
- _nowrapper=True, sandboxsafe=True,
- random_effects_on_gcobjs=True)
+def leave_code(s):
+ vmprof_tl_stack.setraw(s.c_next)
+ lltype.free(s, flavor='raw')
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -4,12 +4,19 @@
from rpython.rlib.rvmprof import cintf
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance
-from rpython.rtyper.lltypesystem import rffi
+from rpython.rtyper.lltypesystem import rffi, llmemory
+from rpython.rtyper.lltypesystem.lloperation import llop
MAX_FUNC_NAME = 1023
# ____________________________________________________________
+# keep in sync with vmprof_stack.h
+VMPROF_CODE_TAG = 1
+VMPROF_BLACKHOLE_TAG = 2
+VMPROF_JITTED_TAG = 3
+VMPROF_JITTING_TAG = 4
+VMPROF_GC_TAG = 5
class VMProfError(Exception):
def __init__(self, msg):
@@ -19,17 +26,16 @@
class VMProf(object):
+ _immutable_fields_ = ['is_enabled?']
+
def __init__(self):
"NOT_RPYTHON: use _get_vmprof()"
self._code_classes = set()
self._gather_all_code_objs = lambda: None
self._cleanup_()
- if sys.maxint == 2147483647:
- self._code_unique_id = 0 # XXX this is wrong, it won't work on
32bit
- else:
- self._code_unique_id = 0x7000000000000000
+ self._code_unique_id = 4
self.cintf = cintf.setup()
-
+
def _cleanup_(self):
self.is_enabled = False
@@ -127,7 +133,6 @@
if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0:
raise VMProfError("vmprof buffers full! disk full or too slow")
-
def vmprof_execute_code(name, get_code_fn, result_class=None):
"""Decorator to be used on the function that interprets a code object.
@@ -136,12 +141,7 @@
'get_code_fn(*args)' is called to extract the code object from the
arguments given to the decorated function.
- The original function can return None, an integer, or an instance.
- In the latter case (only), 'result_class' must be set.
-
- NOTE: for now, this assumes that the decorated functions only takes
- instances or plain integer arguments, and at most 5 of them
- (including 'self' if applicable).
+ 'result_class' is ignored (backward compatibility).
"""
def decorate(func):
try:
@@ -149,52 +149,19 @@
except cintf.VMProfPlatformUnsupported:
return func
- if hasattr(func, 'im_self'):
- assert func.im_self is None
- func = func.im_func
-
- def lower(*args):
- if len(args) == 0:
- return (), ""
- ll_args, token = lower(*args[1:])
- ll_arg = args[0]
- if isinstance(ll_arg, int):
- tok = "i"
- else:
- tok = "r"
- ll_arg = cast_instance_to_gcref(ll_arg)
- return (ll_arg,) + ll_args, tok + token
-
- @specialize.memo()
- def get_ll_trampoline(token):
- if result_class is None:
- restok = "i"
- else:
- restok = "r"
- return cintf.make_trampoline_function(name, func, token, restok)
-
def decorated_function(*args):
- # go through the asm trampoline ONLY if we are translated but not
- # being JITted.
- #
- # If we are not translated, we obviously don't want to go through
- # the trampoline because there is no C function it can call.
- #
# If we are being JITted, we want to skip the trampoline, else the
# JIT cannot see through it.
- #
- if we_are_translated() and not jit.we_are_jitted():
- # if we are translated, call the trampoline
+ if not jit.we_are_jitted():
unique_id = get_code_fn(*args)._vmprof_unique_id
- ll_args, token = lower(*args)
- ll_trampoline = get_ll_trampoline(token)
- ll_result = ll_trampoline(*ll_args + (unique_id,))
- if result_class is not None:
- return cast_base_ptr_to_instance(result_class, ll_result)
- else:
- return ll_result
+ x = cintf.enter_code(unique_id)
+ try:
+ return func(*args)
+ finally:
+ cintf.leave_code(x)
else:
return func(*args)
+
decorated_function.__name__ = func.__name__ + '_rvmprof'
return decorated_function
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c
b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -12,10 +12,12 @@
#else
# include "common_header.h"
+# include "structdef.h"
+# include "src/threadlocal.h"
# include "rvmprof.h"
-# ifndef VMPROF_ADDR_OF_TRAMPOLINE
+/*# ifndef VMPROF_ADDR_OF_TRAMPOLINE
# error "RPython program using rvmprof, but not calling
vmprof_execute_code()"
-# endif
+# endif*/
#endif
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h
b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -4,3 +4,7 @@
RPY_EXTERN int vmprof_enable(void);
RPY_EXTERN int vmprof_disable(void);
RPY_EXTERN int vmprof_register_virtual_function(char *, long, int);
+RPY_EXTERN void* vmprof_stack_new(void);
+RPY_EXTERN int vmprof_stack_append(void*, long);
+RPY_EXTERN long vmprof_stack_pop(void*);
+RPY_EXTERN void vmprof_stack_free(void*);
diff --git a/rpython/rlib/rvmprof/src/vmprof_common.h
b/rpython/rlib/rvmprof/src/vmprof_common.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/vmprof_common.h
@@ -0,0 +1,72 @@
+#include <stddef.h>
+
+#define MAX_FUNC_NAME 1024
+
+static int profile_file = -1;
+static long prepare_interval_usec = 0;
+static long profile_interval_usec = 0;
+static int opened_profile(char *interp_name);
+
+#define MAX_STACK_DEPTH \
+ ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
+
+#define MARKER_STACKTRACE '\x01'
+#define MARKER_VIRTUAL_IP '\x02'
+#define MARKER_TRAILER '\x03'
+#define MARKER_INTERP_NAME '\x04' /* deprecated */
+#define MARKER_HEADER '\x05'
+
+#define VERSION_BASE '\x00'
+#define VERSION_THREAD_ID '\x01'
+#define VERSION_TAG '\x02'
+
+typedef struct prof_stacktrace_s {
+ char padding[sizeof(long) - 1];
+ char marker;
+ long count, depth;
+ void *stack[];
+} prof_stacktrace_s;
+
+
+RPY_EXTERN
+char *vmprof_init(int fd, double interval, char *interp_name)
+{
+ if (interval < 1e-6 || interval >= 1.0)
+ return "bad value for 'interval'";
+ prepare_interval_usec = (int)(interval * 1000000.0);
+
+ if (prepare_concurrent_bufs() < 0)
+ return "out of memory";
+
+ assert(fd >= 0);
+ profile_file = fd;
+ if (opened_profile(interp_name) < 0) {
+ profile_file = -1;
+ return strerror(errno);
+ }
+ return NULL;
+}
+
+static int _write_all(const char *buf, size_t bufsize);
+
+static int opened_profile(char *interp_name)
+{
+ struct {
+ long hdr[5];
+ char interp_name[259];
+ } header;
+
+ size_t namelen = strnlen(interp_name, 255);
+
+ header.hdr[0] = 0;
+ header.hdr[1] = 3;
+ header.hdr[2] = 0;
+ header.hdr[3] = prepare_interval_usec;
+ header.hdr[4] = 0;
+ header.interp_name[0] = MARKER_HEADER;
+ header.interp_name[1] = '\x00';
+ header.interp_name[2] = VERSION_TAG;
+ header.interp_name[3] = namelen;
+ memcpy(&header.interp_name[4], interp_name, namelen);
+ return _write_all((char*)&header, 5 * sizeof(long) + 4 + namelen);
+}
diff --git a/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
--- a/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
+++ b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
@@ -1,119 +1,49 @@
-#ifdef PYPY_JIT_CODEMAP
void *pypy_find_codemap_at_addr(long addr, long *start_addr);
long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
long *current_pos_addr);
long pypy_jit_stack_depth_at_loc(long loc);
-#endif
-#ifdef CPYTHON_GET_CUSTOM_OFFSET
-static void *tramp_start, *tramp_end;
-#endif
-
-
-static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) {
-
-#if defined(PYPY_JIT_CODEMAP)
-
- intptr_t ip_l = (intptr_t)ip;
- return pypy_jit_stack_depth_at_loc(ip_l);
-
-#elif defined(CPYTHON_GET_CUSTOM_OFFSET)
-
- if (ip >= tramp_start && ip <= tramp_end) {
- // XXX the return value is wrong for all the places before push and
- // after pop, fix
- void *bp;
- void *sp;
-
- /* This is a stage2 trampoline created by hotpatch:
-
- push %rbx
- push %rbp
- mov %rsp,%rbp
- and $0xfffffffffffffff0,%rsp // make sure the stack is
aligned
- movabs $0x7ffff687bb10,%rbx
- callq *%rbx
- leaveq
- pop %rbx
- retq
-
- the stack layout is like this:
-
- +-----------+ high addresses
- | ret addr |
- +-----------+
- | saved rbx | start of the function frame
- +-----------+
- | saved rbp |
- +-----------+
- | ........ | <-- rbp
- +-----------+ low addresses
-
- So, the trampoline frame starts at rbp+16, and the return address,
- is at rbp+24. The vmprof API requires us to return the offset of
- the frame relative to sp, hence we have this weird computation.
-
- XXX (antocuni): I think we could change the API to return directly
- the frame address instead of the offset; however, this require a
- change in the PyPy code too
- */
-
- unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
- unw_get_reg (cp, UNW_X86_64_RBP, (unw_word_t*)&bp);
- return bp+16+8-sp;
- }
- return -1;
-
-#else
-
- return -1;
-
-#endif
-}
-
-static long vmprof_write_header_for_jit_addr(void **result, long n,
- void *ip, int max_depth)
+static long vmprof_write_header_for_jit_addr(intptr_t *result, long n,
+ intptr_t ip, int max_depth)
{
#ifdef PYPY_JIT_CODEMAP
void *codemap;
long current_pos = 0;
- intptr_t id;
+ intptr_t ident;
long start_addr = 0;
intptr_t addr = (intptr_t)ip;
int start, k;
- void *tmp;
+ intptr_t tmp;
codemap = pypy_find_codemap_at_addr(addr, &start_addr);
- if (codemap == NULL)
- // not a jit code at all
+ if (codemap == NULL || n >= max_depth - 2)
+ // not a jit code at all or almost max depth
return n;
// modify the last entry to point to start address and not the random one
// in the middle
- result[n - 1] = (void*)start_addr;
- result[n] = (void*)2;
- n++;
+ result[n++] = VMPROF_ASSEMBLER_TAG;
+ result[n++] = start_addr;
start = n;
while (n < max_depth) {
- id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos);
- if (id == -1)
+ ident = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos);
+ if (ident == -1)
// finish
break;
- if (id == 0)
+ if (ident == 0)
continue; // not main codemap
- result[n++] = (void *)id;
+ result[n++] = VMPROF_JITTED_TAG;
+ result[n++] = ident;
}
- k = 0;
+ k = 1;
+
while (k < (n - start) / 2) {
tmp = result[start + k];
- result[start + k] = result[n - k - 1];
- result[n - k - 1] = tmp;
- k++;
- }
- if (n < max_depth) {
- result[n++] = (void*)3;
+ result[start + k] = result[n - k];
+ result[n - k] = tmp;
+ k += 2;
}
#endif
return n;
diff --git a/rpython/rlib/rvmprof/src/vmprof_getpc.h
b/rpython/rlib/rvmprof/src/vmprof_getpc.h
--- a/rpython/rlib/rvmprof/src/vmprof_getpc.h
+++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h
@@ -134,7 +134,7 @@
}
};
-void* GetPC(ucontext_t *signal_ucontext) {
+intptr_t GetPC(ucontext_t *signal_ucontext) {
// See comment above struct CallUnrollInfo. Only try instruction
// flow matching if both eip and esp looks reasonable.
const int eip = signal_ucontext->uc_mcontext.gregs[REG_EIP];
@@ -146,12 +146,12 @@
if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
// We have a match.
- void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
+ intptr_t *retaddr = (intptr_t*)(esp +
callunrollinfo[i].return_sp_offset);
return *retaddr;
}
}
}
- return (void*)eip;
+ return eip;
}
// Special case #2: Windows, which has to do something totally different.
@@ -170,7 +170,7 @@
typedef int ucontext_t;
#endif
-void* GetPC(ucontext_t *signal_ucontext) {
+intptr_t GetPC(ucontext_t *signal_ucontext) {
RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
return NULL;
}
@@ -180,11 +180,11 @@
// the right value for your system, and add it to the list in
// configure.ac (or set it manually in your config.h).
#else
-void* GetPC(ucontext_t *signal_ucontext) {
+intptr_t GetPC(ucontext_t *signal_ucontext) {
#ifdef __APPLE__
- return (void*)(signal_ucontext->uc_mcontext->__ss.__rip);
+ return (signal_ucontext->uc_mcontext->__ss.__rip);
#else
- return (void*)signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h
+ return signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h
#endif
}
diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h
b/rpython/rlib/rvmprof/src/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/vmprof_main.h
@@ -25,84 +25,28 @@
#include <sys/time.h>
#include <errno.h>
#include <unistd.h>
+#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/stat.h>
+#include <unistd.h>
#include <fcntl.h>
#include "vmprof_getpc.h"
-#ifdef __APPLE__
-#include "libunwind.h"
-#else
-#include "vmprof_unwind.h"
-#endif
#include "vmprof_mt.h"
-
+#include "vmprof_stack.h"
+#include "vmprof_common.h"
/************************************************************/
-// functions copied from libunwind using dlopen
-
-#ifndef __APPLE__ // should be linux only probably
-static int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL;
-static int (*unw_step)(unw_cursor_t*) = NULL;
-static int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL;
-static int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL;
-#endif
-
-static int profile_file = -1;
static long prepare_interval_usec;
+static long saved_profile_file;
static struct profbuf_s *volatile current_codes;
static void *(*mainloop_get_virtual_ip)(char *) = 0;
static int opened_profile(char *interp_name);
static void flush_codes(void);
-#ifdef __APPLE__
-#define UNWIND_NAME "/usr/lib/system/libunwind.dylib"
-#define UNW_PREFIX "unw"
-#else
-#define UNWIND_NAME "libunwind.so"
-#define UNW_PREFIX "_ULx86_64"
-#endif
-
-RPY_EXTERN
-char *vmprof_init(int fd, double interval, char *interp_name)
-{
- if (interval < 1e-6 || interval >= 1.0)
- return "bad value for 'interval'";
- prepare_interval_usec = (int)(interval * 1000000.0);
-
-#ifndef __APPLE__
- if (!unw_get_reg) {
- void *libhandle;
-
- if (!(libhandle = dlopen(UNWIND_NAME, RTLD_LAZY | RTLD_LOCAL)))
- goto error;
- if (!(unw_get_reg = dlsym(libhandle, UNW_PREFIX "_get_reg")))
- goto error;
- if (!(unw_get_proc_info = dlsym(libhandle, UNW_PREFIX
"_get_proc_info")))
- goto error;
- if (!(unw_init_local = dlsym(libhandle, UNW_PREFIX "_init_local")))
- goto error;
- if (!(unw_step = dlsym(libhandle, UNW_PREFIX "_step")))
- goto error;
- }
-#endif
- if (prepare_concurrent_bufs() < 0)
- return "out of memory";
-
- assert(fd >= 0);
- profile_file = fd;
- if (opened_profile(interp_name) < 0) {
- profile_file = -1;
- return strerror(errno);
- }
- return NULL;
-
- error:
- return dlerror();
-}
/************************************************************/
@@ -131,131 +75,62 @@
* *************************************************************
*/
-#define MAX_FUNC_NAME 128
-#define MAX_STACK_DEPTH \
- ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
-
-#define MARKER_STACKTRACE '\x01'
-#define MARKER_VIRTUAL_IP '\x02'
-#define MARKER_TRAILER '\x03'
-#define MARKER_INTERP_NAME '\x04' /* deprecated */
-#define MARKER_HEADER '\x05'
-
-#define VERSION_BASE '\x00'
-#define VERSION_THREAD_ID '\x01'
-
-struct prof_stacktrace_s {
- char padding[sizeof(long) - 1];
- char marker;
- long count, depth;
- void *stack[];
-};
-
-static long profile_interval_usec = 0;
static char atfork_hook_installed = 0;
-/* ******************************************************
- * libunwind workaround for process JIT frames correctly
- * ******************************************************
- */
-
#include "vmprof_get_custom_offset.h"
-typedef struct {
- void* _unused1;
- void* _unused2;
- void* sp;
- void* ip;
- void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4];
-} vmprof_hacked_unw_cursor_t;
-
-static int vmprof_unw_step(unw_cursor_t *cp, int first_run)
-{
- void* ip;
- void* sp;
- ptrdiff_t sp_offset;
- unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip);
- unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
- if (!first_run) {
- // make sure we're pointing to the CALL and not to the first
- // instruction after. If the callee adjusts the stack for us
- // it's not safe to be at the instruction after
- ip -= 1;
- }
- sp_offset = vmprof_unw_get_custom_offset(ip, cp);
-
- if (sp_offset == -1) {
- // it means that the ip is NOT in JITted code, so we can use the
- // stardard unw_step
- return unw_step(cp);
- }
- else {
- // this is a horrible hack to manually walk the stack frame, by
- // setting the IP and SP in the cursor
- vmprof_hacked_unw_cursor_t *cp2 = (vmprof_hacked_unw_cursor_t*)cp;
- void* bp = (void*)sp + sp_offset;
- cp2->sp = bp;
- bp -= sizeof(void*);
- cp2->ip = ((void**)bp)[0];
- // the ret is on the top of the stack minus WORD
- return 1;
- }
-}
-
-
/* *************************************************************
* functions to dump the stack trace
* *************************************************************
*/
-static int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext)
+
+#ifndef RPYTHON_LL2CTYPES
+static vmprof_stack_t *get_vmprof_stack(void)
{
- void *ip;
- int n = 0;
- unw_cursor_t cursor;
-#ifdef __APPLE__
- unw_context_t uc;
- unw_getcontext(&uc);
+ return RPY_THREADLOCALREF_GET(vmprof_tl_stack);
+}
#else
- unw_context_t uc = *ucontext;
+static vmprof_stack_t *get_vmprof_stack(void)
+{
+ return 0;
+}
#endif
- int ret = unw_init_local(&cursor, &uc);
- assert(ret >= 0);
- (void)ret;
-
- while (n < max_depth) {
- if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
- break;
+static int get_stack_trace(intptr_t *result, int max_depth, intptr_t pc,
ucontext_t *ucontext)
+{
+ vmprof_stack_t* stack = get_vmprof_stack();
+ int n = 0;
+ intptr_t addr = 0;
+ int bottom_jitted = 0;
+ // check if the pc is in JIT
+#ifdef PYPY_JIT_CODEMAP
+ if (pypy_find_codemap_at_addr((intptr_t)pc, &addr)) {
+ // the bottom part is jitted, means we can fill up the first part
+ // from the JIT
+ n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
+ stack = stack->next; // skip the first item as it contains garbage
+ }
+#endif
+ while (n < max_depth - 1 && stack) {
+ if (stack->kind == VMPROF_CODE_TAG) {
+ result[n] = stack->kind;
+ result[n + 1] = stack->value;
+ n += 2;
}
-
- unw_proc_info_t pip;
- unw_get_proc_info(&cursor, &pip);
-
- /* if n==0, it means that the signal handler interrupted us while we
- were in the trampoline, so we are not executing (yet) the real main
- loop function; just skip it */
- if (VMPROF_ADDR_OF_TRAMPOLINE((void*)pip.start_ip) && n > 0) {
- // found main loop stack frame
- void* sp;
- unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp);
- if (mainloop_get_virtual_ip)
- ip = mainloop_get_virtual_ip((char *)sp);
- else
- ip = *(void **)sp;
+#ifdef PYPY_JIT_CODEMAP
+ else if (stack->kind == VMPROF_JITTED_TAG) {
+ pc = ((intptr_t*)(stack->value - sizeof(intptr_t)))[0];
+ n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
}
-
- int first_run = (n == 0);
- result[n++] = ip;
- n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth);
- if (vmprof_unw_step(&cursor, first_run) <= 0)
- break;
+#endif
+ stack = stack->next;
}
return n;
}
-static void *get_current_thread_id(void)
+static intptr_t get_current_thread_id(void)
{
/* xxx This function is a hack on two fronts:
@@ -269,7 +144,7 @@
An alternative would be to try to look if the information is
available in the ucontext_t in the caller.
*/
- return (void *)pthread_self();
+ return (intptr_t)pthread_self();
}
@@ -278,8 +153,43 @@
* *************************************************************
*/
+#include <setjmp.h>
+
+volatile int spinlock;
+jmp_buf restore_point;
+
+static void segfault_handler(int arg)
+{
+ longjmp(restore_point, SIGSEGV);
+}
+
static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
{
+#ifdef __APPLE__
+ // TERRIBLE HACK AHEAD
+ // on OS X, the thread local storage is sometimes uninitialized
+ // when the signal handler runs - it means it's impossible to read errno
+ // or call any syscall or read PyThread_Current or pthread_self.
Additionally,
+ // it seems impossible to read the register gs.
+ // here we register segfault handler (all guarded by a spinlock) and call
+ // longjmp in case segfault happens while reading a thread local
+ while (__sync_lock_test_and_set(&spinlock, 1)) {
+ }
+ signal(SIGSEGV, &segfault_handler);
+ int fault_code = setjmp(restore_point);
+ if (fault_code == 0) {
+ pthread_self();
+ get_current_thread_id();
+ } else {
+ signal(SIGSEGV, SIG_DFL);
+ __sync_synchronize();
+ spinlock = 0;
+ return;
+ }
+ signal(SIGSEGV, SIG_DFL);
+ __sync_synchronize();
+ spinlock = 0;
+#endif
long val = __sync_fetch_and_add(&signal_handler_value, 2L);
if ((val & 1) == 0) {
@@ -296,9 +206,8 @@
struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
st->marker = MARKER_STACKTRACE;
st->count = 1;
- st->stack[0] = GetPC((ucontext_t*)ucontext);
- depth = get_stack_trace(st->stack+1, MAX_STACK_DEPTH-2, ucontext);
- depth++; // To account for pc value in stack[0];
+ depth = get_stack_trace(st->stack,
+ MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext), ucontext);
st->depth = depth;
st->stack[depth++] = get_current_thread_id();
p->data_offset = offsetof(struct prof_stacktrace_s, marker);
@@ -363,12 +272,15 @@
static void atfork_disable_timer(void) {
if (profile_interval_usec > 0) {
+ saved_profile_file = profile_file;
+ profile_file = -1;
remove_sigprof_timer();
}
}
static void atfork_enable_timer(void) {
if (profile_interval_usec > 0) {
+ profile_file = saved_profile_file;
install_sigprof_timer();
}
}
@@ -415,7 +327,7 @@
return -1;
}
-static int _write_all(const void *buf, size_t bufsize)
+static int _write_all(const char *buf, size_t bufsize)
{
while (bufsize > 0) {
ssize_t count = write(profile_file, buf, bufsize);
@@ -427,71 +339,13 @@
return 0;
}
-static int opened_profile(char *interp_name)
-{
- struct {
- long hdr[5];
- char interp_name[259];
- } header;
-
- size_t namelen = strnlen(interp_name, 255);
- current_codes = NULL;
-
- header.hdr[0] = 0;
- header.hdr[1] = 3;
- header.hdr[2] = 0;
- header.hdr[3] = prepare_interval_usec;
- header.hdr[4] = 0;
- header.interp_name[0] = MARKER_HEADER;
- header.interp_name[1] = '\x00';
- header.interp_name[2] = VERSION_THREAD_ID;
- header.interp_name[3] = namelen;
- memcpy(&header.interp_name[4], interp_name, namelen);
- return _write_all(&header, 5 * sizeof(long) + 4 + namelen);
-}
-
static int close_profile(void)
{
- char buf[4096];
- ssize_t size;
unsigned char marker = MARKER_TRAILER;
if (_write_all(&marker, 1) < 0)
return -1;
-#ifdef __linux__
- // copy /proc/self/maps to the end of the profile file
- int srcfd = open("/proc/self/maps", O_RDONLY);
- if (srcfd < 0)
- return -1;
-
- while ((size = read(srcfd, buf, sizeof buf)) > 0) {
- if (_write_all(buf, size) < 0) {
- close(srcfd);
- return -1;
- }
- }
- close(srcfd);
-#else
- // freebsd and mac
-#if defined(__APPLE__)
- sprintf(buf, "vmmap %d", getpid());
-#else
- sprintf(buf, "procstat -v %d", getpid());
-#endif
- FILE *srcf = popen(buf, "r");
- if (!srcf)
- return -1;
-
- while ((size = fread(buf, 1, sizeof buf, srcf))) {
- if (_write_all(buf, size) < 0) {
- pclose(srcf);
- return -1;
- }
- }
- pclose(srcf);
-#endif
-
/* don't close() the file descriptor from here */
profile_file = -1;
return 0;
@@ -522,6 +376,9 @@
struct profbuf_s *p;
char *t;
+ if (profile_file == -1)
+ return 0; // silently don't write it
+
retry:
p = current_codes;
if (p != NULL) {
@@ -529,7 +386,7 @@
/* grabbed 'current_codes': we will append the current block
to it if it contains enough room */
size_t freesize = SINGLE_BUF_SIZE - p->data_size;
- if (freesize < blocklen) {
+ if (freesize < (size_t)blocklen) {
/* full: flush it */
commit_buffer(profile_file, p);
p = NULL;
diff --git a/rpython/rlib/rvmprof/src/vmprof_stack.h
b/rpython/rlib/rvmprof/src/vmprof_stack.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/vmprof_stack.h
@@ -0,0 +1,25 @@
+#ifndef _VMPROF_STACK_H_
+#define _VMPROF_STACK_H_
+
+#include <unistd.h>
+
+#define VMPROF_CODE_TAG 1 /* <- also in cintf.py */
+#define VMPROF_BLACKHOLE_TAG 2
+#define VMPROF_JITTED_TAG 3
+#define VMPROF_JITTING_TAG 4
+#define VMPROF_GC_TAG 5
+#define VMPROF_ASSEMBLER_TAG 6
+// whatever we want here
+
+typedef struct vmprof_stack_s {
+ struct vmprof_stack_s* next;
+ intptr_t value;
+ intptr_t kind;
+} vmprof_stack_t;
+
+// the kind is WORD so we consume exactly 3 WORDs and we don't have
+// to worry too much. There is a potential for squeezing it with bit
+// patterns into one WORD, but I don't want to care RIGHT NOW, potential
+// for future optimization potential
+
+#endif
diff --git a/rpython/rlib/rvmprof/test/test_ztranslation.py
b/rpython/rlib/rvmprof/test/test_ztranslation.py
--- a/rpython/rlib/rvmprof/test/test_ztranslation.py
+++ b/rpython/rlib/rvmprof/test/test_ztranslation.py
@@ -64,8 +64,14 @@
def test_interpreted():
# takes forever if the Python process is already big...
import subprocess
- subprocess.check_call([sys.executable, os.path.basename(__file__)],
- cwd=(os.path.dirname(__file__) or '.'))
+ me = os.path.basename(__file__)
+ if me.endswith('pyc') or me.endswith('pyo'):
+ me = me[:-1]
+ env = os.environ.copy()
+ env['PYTHONPATH'] = ''
+ subprocess.check_call([sys.executable, me],
+ cwd=(os.path.dirname(__file__) or '.'),
+ env=env)
def test_compiled():
fn = compile(main, [], gcpolicy="minimark")
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit