Author: Richard Plangger <[email protected]>
Branch: vmprof-native
Changeset: r89998:dc4c3b35b94c
Date: 2017-02-07 14:29 +0100
http://bitbucket.org/pypy/pypy/changeset/dc4c3b35b94c/

Log:    passing some more vmprof tests

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -18,7 +18,7 @@
 UDIS86 = SHARED.join('libudis86')
 BACKTRACE = SHARED.join('libbacktrace')
 
-compile_extra = ['-DRPYTHON_LL2CTYPES','-DRPYTHON_VMPROF']
+compile_extra = ['-DRPYTHON_VMPROF', '-g', '-O1']
 if sys.platform.startswith('linux'):
     separate_module_files = [
        BACKTRACE.join('backtrace.c'),
@@ -66,14 +66,17 @@
 
 
 def setup():
-    platform.verify_eci(ExternalCompilationInfo(**eci_kwds))
+    eci_kwds['compile_extra'].append('-DRPYTHON_LL2CTYPES')
+    platform.verify_eci(ExternalCompilationInfo(
+                        **eci_kwds))
 
     eci = global_eci
     vmprof_init = rffi.llexternal("vmprof_init",
                                   [rffi.INT, rffi.DOUBLE, rffi.INT, rffi.INT,
                                    rffi.CCHARP, rffi.INT],
                                   rffi.CCHARP, compilation_info=eci)
-    vmprof_enable = rffi.llexternal("vmprof_enable", [rffi.INT], rffi.INT,
+    vmprof_enable = rffi.llexternal("vmprof_enable", [rffi.INT, rffi.INT],
+                                    rffi.INT,
                                     compilation_info=eci,
                                     save_err=rffi.RFFI_SAVE_ERRNO)
     vmprof_disable = rffi.llexternal("vmprof_disable", [], rffi.INT,
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -10,6 +10,8 @@
 
 MAX_FUNC_NAME = 1023
 
+PLAT_WINDOWS = sys.platform == 'win32'
+
 # ____________________________________________________________
 
 # keep in sync with vmprof_stack.h
@@ -132,6 +134,8 @@
         if self.is_enabled:
             raise VMProfError("vmprof is already enabled")
 
+        if PLAT_WINDOWS:
+            native = 0 # force disabled on Windows
         lines = 0 # not supported on PyPy currently
 
         p_error = self.cintf.vmprof_init(fileno, interval, lines, memory, 
"pypy", native)
@@ -139,7 +143,7 @@
             raise VMProfError(rffi.charp2str(p_error))
 
         self._gather_all_code_objs()
-        res = self.cintf.vmprof_enable(memory)
+        res = self.cintf.vmprof_enable(memory, native)
         if res < 0:
             raise VMProfError(os.strerror(rposix.get_saved_errno()))
         self.is_enabled = True
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c 
b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -3,8 +3,6 @@
 #ifdef RPYTHON_LL2CTYPES
    /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
 
-static volatile int is_enabled = 0;
-
 #else
 #  include "common_header.h"
 #  include "structdef.h"
@@ -47,3 +45,17 @@
     return NULL; // TODO _default_eval_loop(f, throwflag);
 }
 #endif
+
+void dump_native_symbols(int fileno)
+{
+// TODO    PyObject * mod = NULL;
+// TODO
+// TODO    mod = PyImport_ImportModuleNoBlock("vmprof");
+// TODO    if (mod == NULL)
+// TODO        goto error;
+// TODO
+// TODO    PyObject_CallMethod(mod, "dump_native_symbols", "(l)", fileno);
+// TODO
+// TODOerror:
+// TODO    Py_XDECREF(mod);
+}
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h 
b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -25,7 +25,7 @@
 RPY_EXTERN char *vmprof_init(int fd, double interval, int memory,
                      int lines, const char *interp_name, int native);
 RPY_EXTERN void vmprof_ignore_signals(int);
-RPY_EXTERN int vmprof_enable(int memory);
+RPY_EXTERN int vmprof_enable(int memory, int native);
 RPY_EXTERN int vmprof_disable(void);
 RPY_EXTERN int vmprof_register_virtual_function(char *, long, int);
 RPY_EXTERN void* vmprof_stack_new(void);
diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.c 
b/rpython/rlib/rvmprof/src/shared/_vmprof.c
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.c
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.c
@@ -12,6 +12,8 @@
 #include "_vmprof.h"
 
 static volatile int is_enabled = 0;
+static destructor Original_code_dealloc = 0;
+static PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
 
 #if VMPROF_UNIX
 #include "trampoline.h"
@@ -23,9 +25,6 @@
 #endif
 #include "vmp_stack.h"
 
-static destructor Original_code_dealloc = 0;
-PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
-
 #ifdef VMPROF_UNIX
 #ifdef __clang__
 __attribute__((disable_tail_calls))
@@ -150,30 +149,6 @@
     Original_code_dealloc(co);
 }
 
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-static void init_cpyprof(int native)
-{
-    // skip this if native should not be enabled
-    if (!native) {
-        vmp_native_disable();
-        return;
-    }
-#if CPYTHON_HAS_FRAME_EVALUATION
-    PyThreadState *tstate = PyThreadState_GET();
-    tstate->interp->eval_frame = vmprof_eval;
-    _default_eval_loop = _PyEval_EvalFrameDefault;
-#else
-    if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx,
-                vmprof_eval, (void*)&_default_eval_loop) == 0) {
-    } else {
-        fprintf(stderr, "FATAL: could not insert trampline, try with 
--no-native\n");
-        // TODO dump the first few bytes and tell them to create an issue!
-        exit(-1);
-    }
-#endif
-    vmp_native_enable();
-}
-#endif
 
 void dump_native_symbols(int fileno)
 {
@@ -189,22 +164,6 @@
     Py_XDECREF(mod);
 }
 
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-static void disable_cpyprof(void)
-{
-    vmp_native_disable();
-#if CPYTHON_HAS_FRAME_EVALUATION
-    PyThreadState *tstate = PyThreadState_GET();
-    tstate->interp->eval_frame = _PyEval_EvalFrameDefault;
-#else
-    if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) {
-        fprintf(stderr, "FATAL: could not remove trampoline\n");
-        exit(-1);
-    }
-#endif
-    dump_native_symbols(vmp_profile_fileno());
-}
-#endif
 
 
 static PyObject *enable_vmprof(PyObject* self, PyObject *args)
@@ -229,10 +188,6 @@
 
     vmp_profile_lines(lines);
 
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-    init_cpyprof(native);
-#endif
-
     if (!Original_code_dealloc) {
         Original_code_dealloc = PyCode_Type.tp_dealloc;
         PyCode_Type.tp_dealloc = &cpyprof_code_dealloc;
@@ -244,7 +199,7 @@
         return NULL;
     }
 
-    if (vmprof_enable(memory) < 0) {
+    if (vmprof_enable(memory, native) < 0) {
         PyErr_SetFromErrno(PyExc_OSError);
         return NULL;
     }
@@ -265,9 +220,6 @@
     is_enabled = 0;
     vmprof_ignore_signals(1);
     emit_all_code_objects();
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
-    disable_cpyprof();
-#endif
 
     if (vmprof_disable() < 0) {
         PyErr_SetFromErrno(PyExc_OSError);
@@ -319,7 +271,7 @@
         vmprof_ignore_signals(0);
         return NULL;
     }
-    entry_count = vmp_walk_and_record_stack(tstate->frame, m, 
MAX_STACK_DEPTH-1, 0);
+    entry_count = vmp_walk_and_record_stack(tstate->frame, m, 
MAX_STACK_DEPTH-1, 0, 0);
 
     for (i = 0; i < entry_count; i++) {
         routine_ip = m[i];
diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.h 
b/rpython/rlib/rvmprof/src/shared/_vmprof.h
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.h
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.h
@@ -8,6 +8,7 @@
 #else
 #include <inttypes.h>
 #include <stdint.h>
+#include <stddef.h>
 #endif
 
 /**
diff --git a/rpython/rlib/rvmprof/src/shared/symboltable.c 
b/rpython/rlib/rvmprof/src/shared/symboltable.c
--- a/rpython/rlib/rvmprof/src/shared/symboltable.c
+++ b/rpython/rlib/rvmprof/src/shared/symboltable.c
@@ -9,19 +9,7 @@
 
 #include <dlfcn.h>
 #ifdef VMPROF_LINUX
-#define _GNU_SOURCE 1
 #include <link.h>
-// copied from man page...
-typedef struct {
-   const char *dli_fname;  /* Pathname of shared object that
-                              contains address */
-   void       *dli_fbase;  /* Base address at which shared
-                              object is loaded */
-   const char *dli_sname;  /* Name of symbol whose definition
-                              overlaps addr */
-   void       *dli_saddr;  /* Exact address of symbol named
-                              in dli_sname */
-} Dl_info;
 #endif
 
 #ifdef _PY_TEST
@@ -232,7 +220,7 @@
         name[name_len-1] = 0;
     }
     lookup_vmprof_debug_info(name, info.dli_fbase, srcfile, srcfile_len, 
lineno);
-#elif defined(VMPROF_LINUX)
+#elif defined(__unix__)
     if (bstate == NULL) {
         bstate = backtrace_create_state (NULL, 1, backtrace_error_cb, NULL);
     }
diff --git a/rpython/rlib/rvmprof/src/shared/symboltable.h 
b/rpython/rlib/rvmprof/src/shared/symboltable.h
--- a/rpython/rlib/rvmprof/src/shared/symboltable.h
+++ b/rpython/rlib/rvmprof/src/shared/symboltable.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#define _GNU_SOURCE 1
+
 /**
  * Extract all the known symbols from the current process and
  * log them to the file descriptor. To read them see binary.py funcs:
diff --git a/rpython/rlib/rvmprof/src/shared/trampoline.c 
b/rpython/rlib/rvmprof/src/shared/trampoline.c
--- a/rpython/rlib/rvmprof/src/shared/trampoline.c
+++ b/rpython/rlib/rvmprof/src/shared/trampoline.c
@@ -1,7 +1,7 @@
 #include "trampoline.h"
 
+#include "vmprof.h"
 #include "machine.h"
-#include "_vmprof.h"
 
 #define _GNU_SOURCE 1
 #include <string.h>
@@ -141,6 +141,9 @@
     while (bytes < needed_bytes) {
         unsigned int res = vmp_machine_code_instr_length(ptr);
         if (res == 0) {
+            fprintf(stderr, "could not determine length of instr for 
trampoline\n");
+            fprintf(stderr, " %x %x %x %x %x\n", ptr[0], ptr[1],
+                            ptr[2], ptr[3], ptr[4]);
             return 1;
         }
 #ifdef X86_32
@@ -180,6 +183,7 @@
     int pagesize;
 
     if (g_trampoline != NULL) {
+        fprintf(stderr, "trampoline already patched\n");
         return 0; // already patched
     }
 
@@ -195,11 +199,13 @@
     char * page = (char*)mmap(NULL, pagesize, PROT_READ|PROT_WRITE|PROT_EXEC,
                               MAP_ANON | MAP_PRIVATE, 0, 0);
     if (page == NULL) {
+        fprintf(stderr, "could not allocate page for trampoline\n");
         return -1;
     }
 
     char * a = (char*)callee_addr;
     if (_redirect_trampoline_and_back(a, page, vmprof_eval) != 0) {
+        fprintf(stderr, "could not redirect 
eval->vmprof_eval->trampoline->eval+off\n");
         return -1;
     }
 
@@ -224,33 +230,4 @@
 int vmp_unpatch_callee_trampoline(void * callee_addr)
 {
     return 0; // currently the trampoline is not removed
-
-    //if (!g_patched) {
-    //    return -1;
-    //}
-
-    //int result;
-    //int pagesize = sysconf(_SC_PAGESIZE);
-    //errno = 0;
-
-    //result = mprotect(PAGE_ALIGNED(callee_addr, pagesize), pagesize*2, 
PROT_READ|PROT_WRITE);
-    //if (result != 0) {
-    //    fprintf(stderr, "read|write protecting callee_addr\n");
-    //    return 1;
-    //}
-
-    //// copy back, assume everything is as if nothing ever happened!!
-    //(void)memcpy(callee_addr, g_trampoline, g_trampoline_length);
-
-    //result = mprotect(PAGE_ALIGNED(callee_addr, pagesize), pagesize*2, 
PROT_READ|PROT_EXEC);
-    //if (result != 0) {
-    //    fprintf(stderr, "read|exec protecting callee addr\n");
-    //    return 1;
-    //}
-
-    //munmap(g_trampoline, pagesize);
-    //g_trampoline = NULL;
-    //g_trampoline_length = 0;
-
-    //return 0;
 }
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c 
b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -0,0 +1,499 @@
+#include "vmp_stack.h"
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include "vmprof.h"
+#include "compat.h"
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+#  ifdef X86_64
+#    define REG_RBX UNW_X86_64_RBX
+#  elif defined(X86_32)
+#    define REG_RBX UNW_X86_EDI
+#  endif
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <mach/message.h>
+#include <mach/kern_return.h>
+#include <mach/task_info.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#elif defined(__unix__)
+#include <dlfcn.h>
+#endif
+
+#ifdef PY_TEST
+// for testing only!
+PY_EVAL_RETURN_T * vmprof_eval(PY_STACK_FRAME_T *f, int throwflag) { return 
NULL; }
+#endif
+
+static int vmp_native_traces_enabled = 0;
+static intptr_t *vmp_ranges = NULL;
+static ssize_t vmp_range_count = 0;
+static int _vmp_profiles_lines = 0;
+
+void vmp_profile_lines(int lines) {
+    _vmp_profiles_lines = lines;
+}
+int vmp_profiles_python_lines(void) {
+    return _vmp_profiles_lines;
+}
+
+static PY_STACK_FRAME_T * _write_python_stack_entry(PY_STACK_FRAME_T * frame, 
void ** result, int * depth)
+{
+    int len;
+    int addr;
+    int j;
+    long line;
+    char *lnotab;
+
+#ifndef RPYTHON_VMPROF // pypy does not support line profiling
+    if (vmp_profiles_python_lines()) {
+        // In the line profiling mode we save a line number for every frame.
+        // Actual line number isn't stored in the frame directly (f_lineno
+        // points to the beginning of the frame), so we need to compute it
+        // from f_lasti and f_code->co_lnotab. Here is explained what co_lnotab
+        // is:
+        // 
https://svn.python.org/projects/python/trunk/Objects/lnotab_notes.txt
+
+        // NOTE: the profiling overhead can be reduced by storing co_lnotab in 
the dump and
+        // moving this computation to the reader instead of doing it here.
+        lnotab = PyStr_AS_STRING(frame->f_code->co_lnotab);
+
+        if (lnotab != NULL) {
+            line = (long)frame->f_lineno;
+            addr = 0;
+
+            len = (int)PyStr_GET_SIZE(frame->f_code->co_lnotab);
+
+            for (j = 0; j < len; j += 2) {
+                addr += lnotab[j];
+                if (addr > frame->f_lasti) {
+                    break;
+                }
+                line += lnotab[j+1];
+            }
+            result[*depth] = (void*) line;
+            *depth = *depth + 1;
+        } else {
+            result[*depth] = (void*) 0;
+            *depth = *depth + 1;
+        }
+    }
+    result[*depth] = (void*)CODE_ADDR_TO_UID(FRAME_CODE(frame));
+    *depth = *depth + 1;
+#else
+    //result[*depth] = (void*)CODE_ADDR_TO_UID(FRAME_CODE(frame));
+    //*depth = *depth + 1;
+
+    if (frame->kind == VMPROF_CODE_TAG) {
+        int n = *depth;
+        result[n++] = (void*)frame->kind;
+        result[n++] = (void*)frame->value;
+        *depth = n;
+    }
+#ifdef PYPY_JIT_CODEMAP
+    else if (frame->kind == VMPROF_JITTED_TAG) {
+        intptr_t pc = ((intptr_t*)(frame->value - sizeof(intptr_t)))[0];
+        n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
+    }
+#endif
+
+#endif
+
+    return FRAME_STEP(frame);
+}
+
+int vmp_walk_and_record_python_stack_only(PY_STACK_FRAME_T *frame, void ** 
result,
+                                     int max_depth, int depth, intptr_t pc)
+{
+    while (depth < max_depth && frame) {
+        frame = _write_python_stack_entry(frame, result, &depth);
+    }
+    return depth;
+}
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+int _write_native_stack(void* addr, void ** result, int depth) {
+    if (vmp_profiles_python_lines()) {
+        // even if we do not log a python stack frame,
+        // we must keep the profile readable
+        result[depth++] = 0;
+    }
+    result[depth++] = addr;
+    return depth;
+}
+#endif
+
+int vmp_walk_and_record_stack(PY_STACK_FRAME_T *frame, void ** result,
+                              int max_depth, int native_skip, intptr_t pc) {
+
+//#ifdef PYPY_JIT_CODEMAP
+//    intptr_t codemap_addr;
+//    if (pypy_find_codemap_at_addr((intptr_t)pc, &codemap_addr)) {
+//        // the bottom part is jitted, means we can fill up the first part
+//        // from the JIT
+//        depth = vmprof_write_header_for_jit_addr(result, depth, pc, 
max_depth);
+//        frame = FRAME_STEP(frame); // skip the first item as it contains 
garbage
+//    }
+//#endif
+
+    // called in signal handler
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+    intptr_t func_addr;
+    unw_cursor_t cursor;
+    unw_context_t uc;
+    unw_proc_info_t pip;
+
+    if (!vmp_native_enabled()) {
+        return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 
0, pc);
+    }
+
+    unw_getcontext(&uc);
+    int ret = unw_init_local(&cursor, &uc);
+    if (ret < 0) {
+        // could not initialize lib unwind cursor and context
+        return -1;
+    }
+
+    while (native_skip > 0) {
+        int err = unw_step(&cursor);
+        if (err <= 0) {
+            return 0;
+        }
+        native_skip--;
+    }
+
+    int depth = 0;
+    PY_STACK_FRAME_T * top_most_frame = frame;
+    while (depth < max_depth) {
+        unw_get_proc_info(&cursor, &pip);
+
+        func_addr = pip.start_ip;
+        //if (func_addr == 0) {
+        //    unw_word_t rip = 0;
+        //    if (unw_get_reg(&cursor, UNW_REG_IP, &rip) < 0) {
+        //        printf("failed failed failed\n");
+        //    }
+        //    func_addr = rip;
+        //    printf("func_addr is 0, now %p\n", rip);
+        //}
+
+
+        if ((void*)pip.start_ip == (void*)vmprof_eval) {
+            // yes we found one stack entry of the python frames!
+            unw_word_t rbx = 0;
+            if (unw_get_reg(&cursor, REG_RBX, &rbx) < 0) {
+                break;
+            }
+            if (rbx != (unw_word_t)top_most_frame) {
+                // uh we are screwed! the ip indicates we are have context
+                // to a PyEval_EvalFrameEx function, but when we tried to 
retrieve
+                // the stack located py frame it has a different address than 
the
+                // current top_most_frame
+                return 0;
+            } else {
+                if (top_most_frame == NULL) {
+                    break;
+                }
+                top_most_frame = _write_python_stack_entry(top_most_frame, 
result, &depth);
+            }
+        } else if (vmp_ignore_ip((intptr_t)func_addr)) {
+            // this is an instruction pointer that should be ignored,
+            // (that is any function name in the mapping range of
+            //  cpython, but of course not extenstions in site-packages))
+            //printf("ignoring %s\n", info.dli_sname);
+        } else {
+            // mark native routines with the first bit set,
+            // this is possible because compiler align to 8 bytes.
+            //
+            depth = _write_native_stack((void*)(func_addr | 0x1), result, 
depth);
+        }
+
+        int err = unw_step(&cursor);
+        if (err <= 0) {
+            // on mac this breaks on Py_Main?
+            break;
+        }
+    }
+
+    if (top_most_frame == NULL) {
+        return depth;
+    }
+    // Whenever the trampoline is inserted, there might be a view python
+    // stack levels that do not have the trampoline!
+    // they should not be consumed, because the let native symbols flow 
forward.
+    return depth; //vmp_walk_and_record_python_stack_only(top_most_frame, 
result, max_depth, depth);
+#else
+    return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, 
pc);
+#endif
+}
+
+int vmp_native_enabled(void) {
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+    return vmp_native_traces_enabled;
+#else
+    return 0;
+#endif
+}
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+int _ignore_symbols_from_path(const char * name) {
+    // which symbols should not be considered while walking
+    // the native stack?
+    if (strstr(name, "python") != NULL &&
+#ifdef __unix__
+        strstr(name, ".so\n") == NULL
+#elif defined(__APPLE__)
+        strstr(name, ".so") == NULL
+#endif
+       ) {
+        return 1;
+    }
+    return 0;
+}
+
+int _reset_vmp_ranges(void) {
+    // initially 10 (start, stop) entries!
+    int max_count = 10;
+    vmp_range_count = 0;
+    if (vmp_ranges != NULL) { free(vmp_ranges); }
+    vmp_ranges = malloc(max_count * sizeof(intptr_t));
+    return max_count;
+}
+
+
+int _resize_ranges(intptr_t ** cursor, int max_count) {
+    ptrdiff_t diff = (*cursor - vmp_ranges);
+    if (diff + 2 > max_count) {
+        max_count *= 2;
+        vmp_ranges = realloc(vmp_ranges, max_count*sizeof(intptr_t));
+        *cursor = vmp_ranges + diff;
+    }
+    return max_count;
+}
+
+intptr_t * _add_to_range(intptr_t * cursor, intptr_t start, intptr_t end) {
+    if (cursor[0] == start) {
+        // the last range is extended, this reduces the entry count
+        // which makes the querying faster
+        cursor[0] = end;
+    } else {
+        if (cursor != vmp_ranges) {
+            // not pointing to the first entry
+            cursor++;
+        }
+        cursor[0] = start;
+        cursor[1] = end;
+        vmp_range_count += 2;
+        cursor++;
+    }
+    return cursor;
+}
+
+#ifdef __unix__
+int vmp_read_vmaps(const char * fname) {
+
+    FILE * fd = fopen(fname, "rb");
+    if (fd == NULL) {
+        return 0;
+    }
+    char * saveptr;
+    char * line = NULL;
+    char * he = NULL;
+    char * name;
+    char *start_hex = NULL, *end_hex = NULL;
+    size_t n = 0;
+    ssize_t size;
+    intptr_t start, end;
+
+    // assumptions to be verified:
+    // 1) /proc/self/maps is ordered ascending by start address
+    // 2) libraries that contain the name 'python' are considered
+    //    candidates in the mapping to be ignored
+    // 3) libraries containing site-packages are not considered
+    //    candidates
+
+    int max_count = _reset_vmp_ranges();
+    intptr_t * cursor = vmp_ranges;
+    cursor[0] = -1;
+    while ((size = getline(&line, &n, fd)) >= 0) {
+        assert(line != NULL);
+        start_hex = strtok_r(line, "-", &saveptr);
+        if (start_hex == NULL) { continue; }
+        start = strtoll(start_hex, &he, 16);
+        end_hex = strtok_r(NULL, " ", &saveptr);
+        if (end_hex == NULL) { continue; }
+        end = strtoll(end_hex, &he, 16);
+        // skip over flags, ...
+        strtok_r(NULL, " ", &saveptr);
+        strtok_r(NULL, " ", &saveptr);
+        strtok_r(NULL, " ", &saveptr);
+        strtok_r(NULL, " ", &saveptr);
+
+        name = saveptr;
+        if (_ignore_symbols_from_path(name)) {
+            max_count = _resize_ranges(&cursor, max_count);
+            cursor = _add_to_range(cursor, start, end);
+        }
+        free(line);
+        line = NULL;
+        n = 0;
+    }
+
+    fclose(fd);
+    return 1;
+}
+#endif
+
+#ifdef __APPLE__
+int vmp_read_vmaps(const char * fname) {
+    kern_return_t kr;
+    task_t task;
+    mach_vm_address_t addr;
+    mach_vm_size_t vmsize;
+    vm_region_top_info_data_t topinfo;
+    mach_msg_type_number_t count;
+    memory_object_name_t obj;
+    int ret = 0;
+    pid_t pid;
+
+    pid = getpid();
+    kr = task_for_pid(mach_task_self(), pid, &task);
+    if (kr != KERN_SUCCESS) {
+        goto teardown;
+    }
+
+    addr = 0;
+    int max_count = _reset_vmp_ranges();
+    intptr_t * cursor = vmp_ranges;
+    cursor[0] = -1;
+
+    do {
+        // extract the top info using vm_region
+        count = VM_REGION_TOP_INFO_COUNT;
+        vmsize = 0;
+        kr = mach_vm_region(task, &addr, &vmsize, VM_REGION_TOP_INFO,
+                          (vm_region_info_t)&topinfo, &count, &obj);
+        if (kr == KERN_SUCCESS) {
+            vm_address_t start = addr, end = addr + vmsize;
+            // dladdr now gives the path of the shared object
+            Dl_info info;
+            if (dladdr((const void*)start, &info) == 0) {
+                // could not find image containing start
+                addr += vmsize;
+                continue;
+            }
+            if (_ignore_symbols_from_path(info.dli_fname)) {
+                // realloc if the chunk is to small
+                max_count = _resize_ranges(&cursor, max_count);
+                cursor = _add_to_range(cursor, start, end);
+            }
+            addr = addr + vmsize;
+        } else if (kr != KERN_INVALID_ADDRESS) {
+            goto teardown;
+        }
+    } while (kr == KERN_SUCCESS);
+
+    ret = 1;
+
+teardown:
+    if (task != MACH_PORT_NULL) {
+        mach_port_deallocate(mach_task_self(), task);
+    }
+    return ret;
+}
+#endif
+
+int vmp_native_enable(void) {
+    vmp_native_traces_enabled = 1;
+
+#if defined(__unix__)
+    return vmp_read_vmaps("/proc/self/maps");
+#elif defined(__APPLE__)
+    return vmp_read_vmaps(NULL);
+#endif
+}
+
+void vmp_native_disable(void) {
+    vmp_native_traces_enabled = 0;
+    if (vmp_ranges != NULL) {
+        free(vmp_ranges);
+        vmp_ranges = NULL;
+    }
+    vmp_range_count = 0;
+}
+
+int vmp_ignore_ip(intptr_t ip) {
+    int i = vmp_binary_search_ranges(ip, vmp_ranges, vmp_range_count);
+    if (i == -1) {
+        return 0;
+    }
+
+    assert((i & 1) == 0 && "returned index MUST be even");
+
+    intptr_t v = vmp_ranges[i];
+    intptr_t v2 = vmp_ranges[i+1];
+    return v <= ip && ip <= v2;
+}
+
+int vmp_binary_search_ranges(intptr_t ip, intptr_t * l, int count) {
+    intptr_t * r = l + count;
+    intptr_t * ol = l;
+    intptr_t * or = r-1;
+    while (1) {
+        ptrdiff_t i = (r-l)/2;
+        if (i == 0) {
+            if (l == ol && *l > ip) {
+                // at the start
+                return -1;
+            } else if (l == or && *l < ip) {
+                // at the end
+                return -1;
+            } else {
+                // we found the lower bound
+                i = l - ol;
+                if ((i & 1) == 1) {
+                    return i-1;
+                }
+                return i;
+            }
+        }
+        intptr_t * m = l + i;
+        if (ip < *m) {
+            r = m;
+        } else {
+            l = m;
+        }
+    }
+    return -1;
+}
+
+int vmp_ignore_symbol_count(void) {
+    return vmp_range_count;
+}
+
+intptr_t * vmp_ignore_symbols(void) {
+    return vmp_ranges;
+}
+
+void vmp_set_ignore_symbols(intptr_t * symbols, int count) {
+    vmp_ranges = symbols;
+    vmp_range_count = count;
+}
+#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.h 
b/rpython/rlib/rvmprof/src/shared/vmp_stack.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "vmprof.h"
+
+int vmp_walk_and_record_stack(PY_STACK_FRAME_T * frame, void **data,
+                              int max_depth, int native_skip, intptr_t pc);
+
+int vmp_native_enabled(void);
+int vmp_native_enable(void);
+int vmp_ignore_ip(intptr_t ip);
+int vmp_binary_search_ranges(intptr_t ip, intptr_t * l, int count);
+int vmp_native_symbols_read(void);
+void vmp_profile_lines(int lines);
+int vmp_profiles_python_lines(void);
+
+int vmp_ignore_symbol_count(void);
+intptr_t * vmp_ignore_symbols(void);
+void vmp_set_ignore_symbols(intptr_t * symbols, int count);
+void vmp_native_disable(void);
+
+#ifdef __unix__
+int vmp_read_vmaps(const char * fname);
+#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof.h 
b/rpython/rlib/rvmprof/src/shared/vmprof.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof.h
@@ -43,6 +43,7 @@
 #define FRAME_CODE(f) f->
 PY_EVAL_RETURN_T * vmprof_eval(PY_STACK_FRAME_T *f, int throwflag);
 #else
+#define RPY_EXTERN
 // for cpython
 #include "_vmprof.h"
 #include <Python.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h 
b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -141,6 +141,7 @@
 #  define _Py_atomic_load_relaxed(pp)  (*(pp))
 #endif
 
+#ifdef RPYTHON_VMPROF
 #ifndef RPYTHON_LL2CTYPES
 static PY_STACK_FRAME_T *get_vmprof_stack(void)
 {
@@ -158,7 +159,6 @@
 }
 #endif
 
-#ifdef RPYTHON_VMPROF
 RPY_EXTERN
 intptr_t vmprof_get_traceback(void *stack, void *ucontext,
                               intptr_t *result_p, intptr_t result_length)
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_main.h 
b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
@@ -92,12 +92,13 @@
 int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, 
intptr_t pc)
 {
     PY_STACK_FRAME_T * frame;
+#ifdef RPYTHON_VMPROF
+    // do nothing here, 
+    current = (PY_STACK_FRAME_T*)frame;
+#else
     if (!current) {
         return 0;
     }
-#ifdef RPYTHON_VMPROF
-    frame = get_vmprof_stack();
-#else
     frame = current->frame;
 #endif
     // skip over
@@ -135,7 +136,7 @@
 #ifdef RPYTHON_VMPROF
     depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1, 
(intptr_t)GetPC(uc));
 #else
-    depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, NULL);
+    depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, 
(intptr_t)NULL);
 #endif
     if (depth == 0) {
         return 0;
@@ -264,14 +265,18 @@
 static void atfork_disable_timer(void) {
     if (profile_interval_usec > 0) {
         remove_sigprof_timer();
+#ifndef RPYTHON_VMPROF
         is_enabled = 0;
+#endif
     }
 }
 
 static void atfork_enable_timer(void) {
     if (profile_interval_usec > 0) {
         install_sigprof_timer();
+#ifndef RPYTHON_VMPROF
         is_enabled = 1;
+#endif
     }
 }
 
@@ -302,9 +307,56 @@
     return 0;
 }
 
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+void init_cpyprof(int native)
+{
+    // skip this if native should not be enabled
+    if (!native) {
+        vmp_native_disable();
+        return;
+    }
+#if CPYTHON_HAS_FRAME_EVALUATION
+    PyThreadState *tstate = PyThreadState_GET();
+    tstate->interp->eval_frame = vmprof_eval;
+    _default_eval_loop = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+    // TODO nothing?
+#else
+    if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx,
+                vmprof_eval, (void*)&_default_eval_loop) == 0) {
+    } else {
+        fprintf(stderr, "FATAL: could not insert trampline, try with 
--no-native\n");
+        // TODO dump the first few bytes and tell them to create an issue!
+        exit(-1);
+    }
+#endif
+    vmp_native_enable();
+}
+
+static void disable_cpyprof(void)
+{
+    vmp_native_disable();
+#if CPYTHON_HAS_FRAME_EVALUATION
+    PyThreadState *tstate = PyThreadState_GET();
+    tstate->interp->eval_frame = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+    // TODO nothing?
+#else
+    if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) {
+        fprintf(stderr, "FATAL: could not remove trampoline\n");
+        exit(-1);
+    }
+#endif
+    dump_native_symbols(vmp_profile_fileno());
+}
+#endif
+
 RPY_EXTERN
-int vmprof_enable(int memory)
+int vmprof_enable(int memory, int native)
 {
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+    init_cpyprof(native);
+#endif
     assert(vmp_profile_fileno() >= 0);
     assert(prepare_interval_usec > 0);
     profile_interval_usec = prepare_interval_usec;
@@ -326,7 +378,7 @@
 }
 
 
-static int close_profile(void)
+int close_profile(void)
 {
     (void)vmp_write_time_now(MARKER_TRAILER);
 
@@ -341,6 +393,9 @@
 {
     vmprof_ignore_signals(1);
     profile_interval_usec = 0;
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+    disable_cpyprof();
+#endif
 
     if (remove_sigprof_timer() == -1)
         return -1;
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to