Author: Richard Plangger <[email protected]>
Branch: vmprof-native
Changeset: r89998:dc4c3b35b94c
Date: 2017-02-07 14:29 +0100
http://bitbucket.org/pypy/pypy/changeset/dc4c3b35b94c/
Log: passing some more vmprof tests
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -18,7 +18,7 @@
UDIS86 = SHARED.join('libudis86')
BACKTRACE = SHARED.join('libbacktrace')
-compile_extra = ['-DRPYTHON_LL2CTYPES','-DRPYTHON_VMPROF']
+compile_extra = ['-DRPYTHON_VMPROF', '-g', '-O1']
if sys.platform.startswith('linux'):
separate_module_files = [
BACKTRACE.join('backtrace.c'),
@@ -66,14 +66,17 @@
def setup():
- platform.verify_eci(ExternalCompilationInfo(**eci_kwds))
+ eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES')
+ platform.verify_eci(ExternalCompilationInfo(
+ **eci_kwds))
eci = global_eci
vmprof_init = rffi.llexternal("vmprof_init",
[rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT,
rffi.CCHARP, rffi.INT],
rffi.CCHARP, compilation_info=eci)
- vmprof_enable = rffi.llexternal("vmprof_enable", [rffi.INT], rffi.INT,
+ vmprof_enable = rffi.llexternal("vmprof_enable", [rffi.INT, rffi.INT],
+ rffi.INT,
compilation_info=eci,
save_err=rffi.RFFI_SAVE_ERRNO)
vmprof_disable = rffi.llexternal("vmprof_disable", [], rffi.INT,
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -10,6 +10,8 @@
MAX_FUNC_NAME = 1023
+PLAT_WINDOWS = sys.platform == 'win32'
+
# ____________________________________________________________
# keep in sync with vmprof_stack.h
@@ -132,6 +134,8 @@
if self.is_enabled:
raise VMProfError("vmprof is already enabled")
+ if PLAT_WINDOWS:
+ native = 0 # force disabled on Windows
lines = 0 # not supported on PyPy currently
p_error = self.cintf.vmprof_init(fileno, interval, lines, memory,
"pypy", native)
@@ -139,7 +143,7 @@
raise VMProfError(rffi.charp2str(p_error))
self._gather_all_code_objs()
- res = self.cintf.vmprof_enable(memory)
+ res = self.cintf.vmprof_enable(memory, native)
if res < 0:
raise VMProfError(os.strerror(rposix.get_saved_errno()))
self.is_enabled = True
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c
b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -3,8 +3,6 @@
#ifdef RPYTHON_LL2CTYPES
/* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
-static volatile int is_enabled = 0;
-
#else
# include "common_header.h"
# include "structdef.h"
@@ -47,3 +45,17 @@
return NULL; // TODO _default_eval_loop(f, throwflag);
}
#endif
+
+void dump_native_symbols(int fileno)
+{
+// TODO PyObject * mod = NULL;
+// TODO
+// TODO mod = PyImport_ImportModuleNoBlock("vmprof");
+// TODO if (mod == NULL)
+// TODO goto error;
+// TODO
+// TODO PyObject_CallMethod(mod, "dump_native_symbols", "(l)", fileno);
+// TODO
+// TODOerror:
+// TODO Py_XDECREF(mod);
+}
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h
b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -25,7 +25,7 @@
RPY_EXTERN char *vmprof_init(int fd, double interval, int memory,
int lines, const char *interp_name, int native);
RPY_EXTERN void vmprof_ignore_signals(int);
-RPY_EXTERN int vmprof_enable(int memory);
+RPY_EXTERN int vmprof_enable(int memory, int native);
RPY_EXTERN int vmprof_disable(void);
RPY_EXTERN int vmprof_register_virtual_function(char *, long, int);
RPY_EXTERN void* vmprof_stack_new(void);
diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.c
b/rpython/rlib/rvmprof/src/shared/_vmprof.c
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.c
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.c
@@ -12,6 +12,8 @@
#include "_vmprof.h"
static volatile int is_enabled = 0;
+static destructor Original_code_dealloc = 0;
+static PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
#if VMPROF_UNIX
#include "trampoline.h"
@@ -23,9 +25,6 @@
#endif
#include "vmp_stack.h"
-static destructor Original_code_dealloc = 0;
-PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
-
#ifdef VMPROF_UNIX
#ifdef __clang__
__attribute__((disable_tail_calls))
@@ -150,30 +149,6 @@
Original_code_dealloc(co);
}
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-static void init_cpyprof(int native)
-{
- // skip this if native should not be enabled
- if (!native) {
- vmp_native_disable();
- return;
- }
-#if CPYTHON_HAS_FRAME_EVALUATION
- PyThreadState *tstate = PyThreadState_GET();
- tstate->interp->eval_frame = vmprof_eval;
- _default_eval_loop = _PyEval_EvalFrameDefault;
-#else
- if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx,
- vmprof_eval, (void*)&_default_eval_loop) == 0) {
- } else {
- fprintf(stderr, "FATAL: could not insert trampline, try with
--no-native\n");
- // TODO dump the first few bytes and tell them to create an issue!
- exit(-1);
- }
-#endif
- vmp_native_enable();
-}
-#endif
void dump_native_symbols(int fileno)
{
@@ -189,22 +164,6 @@
Py_XDECREF(mod);
}
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-static void disable_cpyprof(void)
-{
- vmp_native_disable();
-#if CPYTHON_HAS_FRAME_EVALUATION
- PyThreadState *tstate = PyThreadState_GET();
- tstate->interp->eval_frame = _PyEval_EvalFrameDefault;
-#else
- if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) {
- fprintf(stderr, "FATAL: could not remove trampoline\n");
- exit(-1);
- }
-#endif
- dump_native_symbols(vmp_profile_fileno());
-}
-#endif
static PyObject *enable_vmprof(PyObject* self, PyObject *args)
@@ -229,10 +188,6 @@
vmp_profile_lines(lines);
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
- init_cpyprof(native);
-#endif
-
if (!Original_code_dealloc) {
Original_code_dealloc = PyCode_Type.tp_dealloc;
PyCode_Type.tp_dealloc = &cpyprof_code_dealloc;
@@ -244,7 +199,7 @@
return NULL;
}
- if (vmprof_enable(memory) < 0) {
+ if (vmprof_enable(memory, native) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
@@ -265,9 +220,6 @@
is_enabled = 0;
vmprof_ignore_signals(1);
emit_all_code_objects();
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
- disable_cpyprof();
-#endif
if (vmprof_disable() < 0) {
PyErr_SetFromErrno(PyExc_OSError);
@@ -319,7 +271,7 @@
vmprof_ignore_signals(0);
return NULL;
}
- entry_count = vmp_walk_and_record_stack(tstate->frame, m,
MAX_STACK_DEPTH-1, 0);
+ entry_count = vmp_walk_and_record_stack(tstate->frame, m,
MAX_STACK_DEPTH-1, 0, 0);
for (i = 0; i < entry_count; i++) {
routine_ip = m[i];
diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.h
b/rpython/rlib/rvmprof/src/shared/_vmprof.h
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.h
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.h
@@ -8,6 +8,7 @@
#else
#include <inttypes.h>
#include <stdint.h>
+#include <stddef.h>
#endif
/**
diff --git a/rpython/rlib/rvmprof/src/shared/symboltable.c
b/rpython/rlib/rvmprof/src/shared/symboltable.c
--- a/rpython/rlib/rvmprof/src/shared/symboltable.c
+++ b/rpython/rlib/rvmprof/src/shared/symboltable.c
@@ -9,19 +9,7 @@
#include <dlfcn.h>
#ifdef VMPROF_LINUX
-#define _GNU_SOURCE 1
#include <link.h>
-// copied from man page...
-typedef struct {
- const char *dli_fname; /* Pathname of shared object that
- contains address */
- void *dli_fbase; /* Base address at which shared
- object is loaded */
- const char *dli_sname; /* Name of symbol whose definition
- overlaps addr */
- void *dli_saddr; /* Exact address of symbol named
- in dli_sname */
-} Dl_info;
#endif
#ifdef _PY_TEST
@@ -232,7 +220,7 @@
name[name_len-1] = 0;
}
lookup_vmprof_debug_info(name, info.dli_fbase, srcfile, srcfile_len,
lineno);
-#elif defined(VMPROF_LINUX)
+#elif defined(__unix__)
if (bstate == NULL) {
bstate = backtrace_create_state (NULL, 1, backtrace_error_cb, NULL);
}
diff --git a/rpython/rlib/rvmprof/src/shared/symboltable.h
b/rpython/rlib/rvmprof/src/shared/symboltable.h
--- a/rpython/rlib/rvmprof/src/shared/symboltable.h
+++ b/rpython/rlib/rvmprof/src/shared/symboltable.h
@@ -1,5 +1,7 @@
#pragma once
+#define _GNU_SOURCE 1
+
/**
* Extract all the known symbols from the current process and
* log them to the file descriptor. To read them see binary.py funcs:
diff --git a/rpython/rlib/rvmprof/src/shared/trampoline.c
b/rpython/rlib/rvmprof/src/shared/trampoline.c
--- a/rpython/rlib/rvmprof/src/shared/trampoline.c
+++ b/rpython/rlib/rvmprof/src/shared/trampoline.c
@@ -1,7 +1,7 @@
#include "trampoline.h"
+#include "vmprof.h"
#include "machine.h"
-#include "_vmprof.h"
#define _GNU_SOURCE 1
#include <string.h>
@@ -141,6 +141,9 @@
while (bytes < needed_bytes) {
unsigned int res = vmp_machine_code_instr_length(ptr);
if (res == 0) {
+ fprintf(stderr, "could not determine length of instr for
trampoline\n");
+ fprintf(stderr, " %x %x %x %x %x\n", ptr[0], ptr[1],
+ ptr[2], ptr[3], ptr[4]);
return 1;
}
#ifdef X86_32
@@ -180,6 +183,7 @@
int pagesize;
if (g_trampoline != NULL) {
+ fprintf(stderr, "trampoline already patched\n");
return 0; // already patched
}
@@ -195,11 +199,13 @@
char * page = (char*)mmap(NULL, pagesize, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_ANON | MAP_PRIVATE, 0, 0);
if (page == NULL) {
+ fprintf(stderr, "could not allocate page for trampoline\n");
return -1;
}
char * a = (char*)callee_addr;
if (_redirect_trampoline_and_back(a, page, vmprof_eval) != 0) {
+ fprintf(stderr, "could not redirect
eval->vmprof_eval->trampoline->eval+off\n");
return -1;
}
@@ -224,33 +230,4 @@
int vmp_unpatch_callee_trampoline(void * callee_addr)
{
return 0; // currently the trampoline is not removed
-
- //if (!g_patched) {
- // return -1;
- //}
-
- //int result;
- //int pagesize = sysconf(_SC_PAGESIZE);
- //errno = 0;
-
- //result = mprotect(PAGE_ALIGNED(callee_addr, pagesize), pagesize*2,
PROT_READ|PROT_WRITE);
- //if (result != 0) {
- // fprintf(stderr, "read|write protecting callee_addr\n");
- // return 1;
- //}
-
- //// copy back, assume everything is as if nothing ever happened!!
- //(void)memcpy(callee_addr, g_trampoline, g_trampoline_length);
-
- //result = mprotect(PAGE_ALIGNED(callee_addr, pagesize), pagesize*2,
PROT_READ|PROT_EXEC);
- //if (result != 0) {
- // fprintf(stderr, "read|exec protecting callee addr\n");
- // return 1;
- //}
-
- //munmap(g_trampoline, pagesize);
- //g_trampoline = NULL;
- //g_trampoline_length = 0;
-
- //return 0;
}
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -0,0 +1,499 @@
+#include "vmp_stack.h"
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include "vmprof.h"
+#include "compat.h"
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+# ifdef X86_64
+# define REG_RBX UNW_X86_64_RBX
+# elif defined(X86_32)
+# define REG_RBX UNW_X86_EDI
+# endif
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <mach/message.h>
+#include <mach/kern_return.h>
+#include <mach/task_info.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#elif defined(__unix__)
+#include <dlfcn.h>
+#endif
+
+#ifdef PY_TEST
+// for testing only!
+PY_EVAL_RETURN_T * vmprof_eval(PY_STACK_FRAME_T *f, int throwflag) { return
NULL; }
+#endif
+
+static int vmp_native_traces_enabled = 0;
+static intptr_t *vmp_ranges = NULL;
+static ssize_t vmp_range_count = 0;
+static int _vmp_profiles_lines = 0;
+
+void vmp_profile_lines(int lines) {
+ _vmp_profiles_lines = lines;
+}
+int vmp_profiles_python_lines(void) {
+ return _vmp_profiles_lines;
+}
+
+static PY_STACK_FRAME_T * _write_python_stack_entry(PY_STACK_FRAME_T * frame,
void ** result, int * depth)
+{
+ int len;
+ int addr;
+ int j;
+ long line;
+ char *lnotab;
+
+#ifndef RPYTHON_VMPROF // pypy does not support line profiling
+ if (vmp_profiles_python_lines()) {
+ // In the line profiling mode we save a line number for every frame.
+ // Actual line number isn't stored in the frame directly (f_lineno
+ // points to the beginning of the frame), so we need to compute it
+ // from f_lasti and f_code->co_lnotab. Here is explained what co_lnotab
+ // is:
+ //
https://svn.python.org/projects/python/trunk/Objects/lnotab_notes.txt
+
+ // NOTE: the profiling overhead can be reduced by storing co_lnotab in
the dump and
+ // moving this computation to the reader instead of doing it here.
+ lnotab = PyStr_AS_STRING(frame->f_code->co_lnotab);
+
+ if (lnotab != NULL) {
+ line = (long)frame->f_lineno;
+ addr = 0;
+
+ len = (int)PyStr_GET_SIZE(frame->f_code->co_lnotab);
+
+ for (j = 0; j < len; j += 2) {
+ addr += lnotab[j];
+ if (addr > frame->f_lasti) {
+ break;
+ }
+ line += lnotab[j+1];
+ }
+ result[*depth] = (void*) line;
+ *depth = *depth + 1;
+ } else {
+ result[*depth] = (void*) 0;
+ *depth = *depth + 1;
+ }
+ }
+ result[*depth] = (void*)CODE_ADDR_TO_UID(FRAME_CODE(frame));
+ *depth = *depth + 1;
+#else
+ //result[*depth] = (void*)CODE_ADDR_TO_UID(FRAME_CODE(frame));
+ //*depth = *depth + 1;
+
+ if (frame->kind == VMPROF_CODE_TAG) {
+ int n = *depth;
+ result[n++] = (void*)frame->kind;
+ result[n++] = (void*)frame->value;
+ *depth = n;
+ }
+#ifdef PYPY_JIT_CODEMAP
+ else if (frame->kind == VMPROF_JITTED_TAG) {
+ intptr_t pc = ((intptr_t*)(frame->value - sizeof(intptr_t)))[0];
+ n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
+ }
+#endif
+
+#endif
+
+ return FRAME_STEP(frame);
+}
+
+int vmp_walk_and_record_python_stack_only(PY_STACK_FRAME_T *frame, void **
result,
+ int max_depth, int depth, intptr_t pc)
+{
+ while (depth < max_depth && frame) {
+ frame = _write_python_stack_entry(frame, result, &depth);
+ }
+ return depth;
+}
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+int _write_native_stack(void* addr, void ** result, int depth) {
+ if (vmp_profiles_python_lines()) {
+ // even if we do not log a python stack frame,
+ // we must keep the profile readable
+ result[depth++] = 0;
+ }
+ result[depth++] = addr;
+ return depth;
+}
+#endif
+
+int vmp_walk_and_record_stack(PY_STACK_FRAME_T *frame, void ** result,
+ int max_depth, int native_skip, intptr_t pc) {
+
+//#ifdef PYPY_JIT_CODEMAP
+// intptr_t codemap_addr;
+// if (pypy_find_codemap_at_addr((intptr_t)pc, &codemap_addr)) {
+// // the bottom part is jitted, means we can fill up the first part
+// // from the JIT
+// depth = vmprof_write_header_for_jit_addr(result, depth, pc,
max_depth);
+// frame = FRAME_STEP(frame); // skip the first item as it contains
garbage
+// }
+//#endif
+
+ // called in signal handler
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ intptr_t func_addr;
+ unw_cursor_t cursor;
+ unw_context_t uc;
+ unw_proc_info_t pip;
+
+ if (!vmp_native_enabled()) {
+ return vmp_walk_and_record_python_stack_only(frame, result, max_depth,
0, pc);
+ }
+
+ unw_getcontext(&uc);
+ int ret = unw_init_local(&cursor, &uc);
+ if (ret < 0) {
+ // could not initialize lib unwind cursor and context
+ return -1;
+ }
+
+ while (native_skip > 0) {
+ int err = unw_step(&cursor);
+ if (err <= 0) {
+ return 0;
+ }
+ native_skip--;
+ }
+
+ int depth = 0;
+ PY_STACK_FRAME_T * top_most_frame = frame;
+ while (depth < max_depth) {
+ unw_get_proc_info(&cursor, &pip);
+
+ func_addr = pip.start_ip;
+ //if (func_addr == 0) {
+ // unw_word_t rip = 0;
+ // if (unw_get_reg(&cursor, UNW_REG_IP, &rip) < 0) {
+ // printf("failed failed failed\n");
+ // }
+ // func_addr = rip;
+ // printf("func_addr is 0, now %p\n", rip);
+ //}
+
+
+ if ((void*)pip.start_ip == (void*)vmprof_eval) {
+ // yes we found one stack entry of the python frames!
+ unw_word_t rbx = 0;
+ if (unw_get_reg(&cursor, REG_RBX, &rbx) < 0) {
+ break;
+ }
+ if (rbx != (unw_word_t)top_most_frame) {
+ // uh we are screwed! the ip indicates we are have context
+ // to a PyEval_EvalFrameEx function, but when we tried to
retrieve
+ // the stack located py frame it has a different address than
the
+ // current top_most_frame
+ return 0;
+ } else {
+ if (top_most_frame == NULL) {
+ break;
+ }
+ top_most_frame = _write_python_stack_entry(top_most_frame,
result, &depth);
+ }
+ } else if (vmp_ignore_ip((intptr_t)func_addr)) {
+ // this is an instruction pointer that should be ignored,
+ // (that is any function name in the mapping range of
+ // cpython, but of course not extenstions in site-packages))
+ //printf("ignoring %s\n", info.dli_sname);
+ } else {
+ // mark native routines with the first bit set,
+ // this is possible because compiler align to 8 bytes.
+ //
+ depth = _write_native_stack((void*)(func_addr | 0x1), result,
depth);
+ }
+
+ int err = unw_step(&cursor);
+ if (err <= 0) {
+ // on mac this breaks on Py_Main?
+ break;
+ }
+ }
+
+ if (top_most_frame == NULL) {
+ return depth;
+ }
+ // Whenever the trampoline is inserted, there might be a view python
+ // stack levels that do not have the trampoline!
+ // they should not be consumed, because the let native symbols flow
forward.
+ return depth; //vmp_walk_and_record_python_stack_only(top_most_frame,
result, max_depth, depth);
+#else
+ return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0,
pc);
+#endif
+}
+
+int vmp_native_enabled(void) {
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ return vmp_native_traces_enabled;
+#else
+ return 0;
+#endif
+}
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+int _ignore_symbols_from_path(const char * name) {
+ // which symbols should not be considered while walking
+ // the native stack?
+ if (strstr(name, "python") != NULL &&
+#ifdef __unix__
+ strstr(name, ".so\n") == NULL
+#elif defined(__APPLE__)
+ strstr(name, ".so") == NULL
+#endif
+ ) {
+ return 1;
+ }
+ return 0;
+}
+
+int _reset_vmp_ranges(void) {
+ // initially 10 (start, stop) entries!
+ int max_count = 10;
+ vmp_range_count = 0;
+ if (vmp_ranges != NULL) { free(vmp_ranges); }
+ vmp_ranges = malloc(max_count * sizeof(intptr_t));
+ return max_count;
+}
+
+
+int _resize_ranges(intptr_t ** cursor, int max_count) {
+ ptrdiff_t diff = (*cursor - vmp_ranges);
+ if (diff + 2 > max_count) {
+ max_count *= 2;
+ vmp_ranges = realloc(vmp_ranges, max_count*sizeof(intptr_t));
+ *cursor = vmp_ranges + diff;
+ }
+ return max_count;
+}
+
+intptr_t * _add_to_range(intptr_t * cursor, intptr_t start, intptr_t end) {
+ if (cursor[0] == start) {
+ // the last range is extended, this reduces the entry count
+ // which makes the querying faster
+ cursor[0] = end;
+ } else {
+ if (cursor != vmp_ranges) {
+ // not pointing to the first entry
+ cursor++;
+ }
+ cursor[0] = start;
+ cursor[1] = end;
+ vmp_range_count += 2;
+ cursor++;
+ }
+ return cursor;
+}
+
+#ifdef __unix__
+int vmp_read_vmaps(const char * fname) {
+
+ FILE * fd = fopen(fname, "rb");
+ if (fd == NULL) {
+ return 0;
+ }
+ char * saveptr;
+ char * line = NULL;
+ char * he = NULL;
+ char * name;
+ char *start_hex = NULL, *end_hex = NULL;
+ size_t n = 0;
+ ssize_t size;
+ intptr_t start, end;
+
+ // assumptions to be verified:
+ // 1) /proc/self/maps is ordered ascending by start address
+ // 2) libraries that contain the name 'python' are considered
+ // candidates in the mapping to be ignored
+ // 3) libraries containing site-packages are not considered
+ // candidates
+
+ int max_count = _reset_vmp_ranges();
+ intptr_t * cursor = vmp_ranges;
+ cursor[0] = -1;
+ while ((size = getline(&line, &n, fd)) >= 0) {
+ assert(line != NULL);
+ start_hex = strtok_r(line, "-", &saveptr);
+ if (start_hex == NULL) { continue; }
+ start = strtoll(start_hex, &he, 16);
+ end_hex = strtok_r(NULL, " ", &saveptr);
+ if (end_hex == NULL) { continue; }
+ end = strtoll(end_hex, &he, 16);
+ // skip over flags, ...
+ strtok_r(NULL, " ", &saveptr);
+ strtok_r(NULL, " ", &saveptr);
+ strtok_r(NULL, " ", &saveptr);
+ strtok_r(NULL, " ", &saveptr);
+
+ name = saveptr;
+ if (_ignore_symbols_from_path(name)) {
+ max_count = _resize_ranges(&cursor, max_count);
+ cursor = _add_to_range(cursor, start, end);
+ }
+ free(line);
+ line = NULL;
+ n = 0;
+ }
+
+ fclose(fd);
+ return 1;
+}
+#endif
+
+#ifdef __APPLE__
+int vmp_read_vmaps(const char * fname) {
+ kern_return_t kr;
+ task_t task;
+ mach_vm_address_t addr;
+ mach_vm_size_t vmsize;
+ vm_region_top_info_data_t topinfo;
+ mach_msg_type_number_t count;
+ memory_object_name_t obj;
+ int ret = 0;
+ pid_t pid;
+
+ pid = getpid();
+ kr = task_for_pid(mach_task_self(), pid, &task);
+ if (kr != KERN_SUCCESS) {
+ goto teardown;
+ }
+
+ addr = 0;
+ int max_count = _reset_vmp_ranges();
+ intptr_t * cursor = vmp_ranges;
+ cursor[0] = -1;
+
+ do {
+ // extract the top info using vm_region
+ count = VM_REGION_TOP_INFO_COUNT;
+ vmsize = 0;
+ kr = mach_vm_region(task, &addr, &vmsize, VM_REGION_TOP_INFO,
+ (vm_region_info_t)&topinfo, &count, &obj);
+ if (kr == KERN_SUCCESS) {
+ vm_address_t start = addr, end = addr + vmsize;
+ // dladdr now gives the path of the shared object
+ Dl_info info;
+ if (dladdr((const void*)start, &info) == 0) {
+ // could not find image containing start
+ addr += vmsize;
+ continue;
+ }
+ if (_ignore_symbols_from_path(info.dli_fname)) {
+ // realloc if the chunk is to small
+ max_count = _resize_ranges(&cursor, max_count);
+ cursor = _add_to_range(cursor, start, end);
+ }
+ addr = addr + vmsize;
+ } else if (kr != KERN_INVALID_ADDRESS) {
+ goto teardown;
+ }
+ } while (kr == KERN_SUCCESS);
+
+ ret = 1;
+
+teardown:
+ if (task != MACH_PORT_NULL) {
+ mach_port_deallocate(mach_task_self(), task);
+ }
+ return ret;
+}
+#endif
+
+int vmp_native_enable(void) {
+ vmp_native_traces_enabled = 1;
+
+#if defined(__unix__)
+ return vmp_read_vmaps("/proc/self/maps");
+#elif defined(__APPLE__)
+ return vmp_read_vmaps(NULL);
+#endif
+}
+
+void vmp_native_disable(void) {
+ vmp_native_traces_enabled = 0;
+ if (vmp_ranges != NULL) {
+ free(vmp_ranges);
+ vmp_ranges = NULL;
+ }
+ vmp_range_count = 0;
+}
+
+int vmp_ignore_ip(intptr_t ip) {
+ int i = vmp_binary_search_ranges(ip, vmp_ranges, vmp_range_count);
+ if (i == -1) {
+ return 0;
+ }
+
+ assert((i & 1) == 0 && "returned index MUST be even");
+
+ intptr_t v = vmp_ranges[i];
+ intptr_t v2 = vmp_ranges[i+1];
+ return v <= ip && ip <= v2;
+}
+
+int vmp_binary_search_ranges(intptr_t ip, intptr_t * l, int count) {
+ intptr_t * r = l + count;
+ intptr_t * ol = l;
+ intptr_t * or = r-1;
+ while (1) {
+ ptrdiff_t i = (r-l)/2;
+ if (i == 0) {
+ if (l == ol && *l > ip) {
+ // at the start
+ return -1;
+ } else if (l == or && *l < ip) {
+ // at the end
+ return -1;
+ } else {
+ // we found the lower bound
+ i = l - ol;
+ if ((i & 1) == 1) {
+ return i-1;
+ }
+ return i;
+ }
+ }
+ intptr_t * m = l + i;
+ if (ip < *m) {
+ r = m;
+ } else {
+ l = m;
+ }
+ }
+ return -1;
+}
+
+int vmp_ignore_symbol_count(void) {
+ return vmp_range_count;
+}
+
+intptr_t * vmp_ignore_symbols(void) {
+ return vmp_ranges;
+}
+
+void vmp_set_ignore_symbols(intptr_t * symbols, int count) {
+ vmp_ranges = symbols;
+ vmp_range_count = count;
+}
+#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.h
b/rpython/rlib/rvmprof/src/shared/vmp_stack.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "vmprof.h"
+
+int vmp_walk_and_record_stack(PY_STACK_FRAME_T * frame, void **data,
+ int max_depth, int native_skip, intptr_t pc);
+
+int vmp_native_enabled(void);
+int vmp_native_enable(void);
+int vmp_ignore_ip(intptr_t ip);
+int vmp_binary_search_ranges(intptr_t ip, intptr_t * l, int count);
+int vmp_native_symbols_read(void);
+void vmp_profile_lines(int lines);
+int vmp_profiles_python_lines(void);
+
+int vmp_ignore_symbol_count(void);
+intptr_t * vmp_ignore_symbols(void);
+void vmp_set_ignore_symbols(intptr_t * symbols, int count);
+void vmp_native_disable(void);
+
+#ifdef __unix__
+int vmp_read_vmaps(const char * fname);
+#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof.h
b/rpython/rlib/rvmprof/src/shared/vmprof.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof.h
@@ -43,6 +43,7 @@
#define FRAME_CODE(f) f->
PY_EVAL_RETURN_T * vmprof_eval(PY_STACK_FRAME_T *f, int throwflag);
#else
+#define RPY_EXTERN
// for cpython
#include "_vmprof.h"
#include <Python.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -141,6 +141,7 @@
# define _Py_atomic_load_relaxed(pp) (*(pp))
#endif
+#ifdef RPYTHON_VMPROF
#ifndef RPYTHON_LL2CTYPES
static PY_STACK_FRAME_T *get_vmprof_stack(void)
{
@@ -158,7 +159,6 @@
}
#endif
-#ifdef RPYTHON_VMPROF
RPY_EXTERN
intptr_t vmprof_get_traceback(void *stack, void *ucontext,
intptr_t *result_p, intptr_t result_length)
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_main.h
b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
@@ -92,12 +92,13 @@
int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth,
intptr_t pc)
{
PY_STACK_FRAME_T * frame;
+#ifdef RPYTHON_VMPROF
+ // do nothing here,
+ current = (PY_STACK_FRAME_T*)frame;
+#else
if (!current) {
return 0;
}
-#ifdef RPYTHON_VMPROF
- frame = get_vmprof_stack();
-#else
frame = current->frame;
#endif
// skip over
@@ -135,7 +136,7 @@
#ifdef RPYTHON_VMPROF
depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1,
(intptr_t)GetPC(uc));
#else
- depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, NULL);
+ depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1,
(intptr_t)NULL);
#endif
if (depth == 0) {
return 0;
@@ -264,14 +265,18 @@
static void atfork_disable_timer(void) {
if (profile_interval_usec > 0) {
remove_sigprof_timer();
+#ifndef RPYTHON_VMPROF
is_enabled = 0;
+#endif
}
}
static void atfork_enable_timer(void) {
if (profile_interval_usec > 0) {
install_sigprof_timer();
+#ifndef RPYTHON_VMPROF
is_enabled = 1;
+#endif
}
}
@@ -302,9 +307,56 @@
return 0;
}
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+void init_cpyprof(int native)
+{
+ // skip this if native should not be enabled
+ if (!native) {
+ vmp_native_disable();
+ return;
+ }
+#if CPYTHON_HAS_FRAME_EVALUATION
+ PyThreadState *tstate = PyThreadState_GET();
+ tstate->interp->eval_frame = vmprof_eval;
+ _default_eval_loop = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+ // TODO nothing?
+#else
+ if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx,
+ vmprof_eval, (void*)&_default_eval_loop) == 0) {
+ } else {
+ fprintf(stderr, "FATAL: could not insert trampline, try with
--no-native\n");
+ // TODO dump the first few bytes and tell them to create an issue!
+ exit(-1);
+ }
+#endif
+ vmp_native_enable();
+}
+
+static void disable_cpyprof(void)
+{
+ vmp_native_disable();
+#if CPYTHON_HAS_FRAME_EVALUATION
+ PyThreadState *tstate = PyThreadState_GET();
+ tstate->interp->eval_frame = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+ // TODO nothing?
+#else
+ if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) {
+ fprintf(stderr, "FATAL: could not remove trampoline\n");
+ exit(-1);
+ }
+#endif
+ dump_native_symbols(vmp_profile_fileno());
+}
+#endif
+
RPY_EXTERN
-int vmprof_enable(int memory)
+int vmprof_enable(int memory, int native)
{
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ init_cpyprof(native);
+#endif
assert(vmp_profile_fileno() >= 0);
assert(prepare_interval_usec > 0);
profile_interval_usec = prepare_interval_usec;
@@ -326,7 +378,7 @@
}
-static int close_profile(void)
+int close_profile(void)
{
(void)vmp_write_time_now(MARKER_TRAILER);
@@ -341,6 +393,9 @@
{
vmprof_ignore_signals(1);
profile_interval_usec = 0;
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ disable_cpyprof();
+#endif
if (remove_sigprof_timer() == -1)
return -1;
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit