Hi,

Here is a diff for dynamic tracing of kernel's functions boundaries.
It's implemented as one of the dt's provider on i386 and amd64.
To activate it, DDBPROF and pseudo device dt must be activated
on GENERIC.

For now it's working like DTRACE fbt(function boundaries tracing).

We replace the prologue and epilogue of each function with a breakpoint.
The replaced instruction on amd64 and 386 are respectively
"push %rbp", "push %ebp" for prologue and "ret", "pop %ebp" for epilogue.
These instructions are emulated at the end of the breakpoint handler,
just after sending an event to userland (to be read by btrace).

For now the lookup to find the instruction to replace is hardcoded
and needs to find if there is a retguard before the prologue of the
function or if there are multiples int3 after the last ret.
It allows to find 31896 entry points and 19513 return points on amd64.

ddb has also  a similar way of tracing all prologue of function. It now
uses this new dt provider for doing tracing.

Perf wise, when all entry probes are enabled, the slow down is
quite massive but expected since breakpoints are slow.
On the kernel compilation with 10 threads on a linux qemu VM we go from:
real   2m38.280s
user   10m2.050s
sys    14m10.360s

to:
real   24m19.280s
user   9m44.110s
sys    220m26.980s

Any comments on the diff?

Tom

diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index dd2dfde3e3b..3a0a58ba3ac 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -188,10 +188,11 @@ INTRENTRY_LABEL(trap03):
     sti
     cld
     SMAP_CLAC
-    movq    %rsp, %rdi
-    call    _C_LABEL(db_prof_hook)
-    cmpl    $1, %eax
-    jne    .Lreal_kern_trap
+    leaq    _C_LABEL(dt_prov_kprobe), %rdi
+    movq    %rsp, %rsi
+    call    _C_LABEL(dt_prov_kprobe_hook)
+    cmpl    $0, %eax
+    je     .Lreal_kern_trap

     cli
     movq    TF_RDI(%rsp),%rdi
@@ -210,6 +211,11 @@ INTRENTRY_LABEL(trap03):
     movq    TF_R11(%rsp),%r11
     /* %rax restored below, after being used to shift the stack */

+    cmpl    $2, %eax
+    je      .Lemulate_ret
+
+.Lemulate_push_rbp:
+
     /*
      * We are returning from a probe trap so we need to fix the
      * stack layout and emulate the patched instruction.
@@ -217,6 +223,9 @@ INTRENTRY_LABEL(trap03):
      */
     subq    $16, %rsp

+    movq    (TF_RAX + 16)(%rsp), %rax
+    movq    %rax, TF_RAX(%rsp)
+
     /* Shift hardware-saved registers. */
     movq    (TF_RIP + 16)(%rsp), %rax
     movq    %rax, TF_RIP(%rsp)
@@ -237,7 +246,20 @@ INTRENTRY_LABEL(trap03):

     /* Finally restore %rax */
     movq    (TF_RAX + 16)(%rsp),%rax
+    jmp .ret_int3
+
+.Lemulate_ret:
+
+    /* Store a new return address in %rip */
+    movq    TF_RSP(%rsp), %rax
+    movq    (%rax), %rax
+    movq    %rax, TF_RIP(%rsp)
+    addq    $8, TF_RSP(%rsp)
+
+    /* Finally restore %rax */
+    movq    (TF_RAX)(%rsp),%rax

+.ret_int3:
     addq    $TF_RIP,%rsp
     iretq
 #endif /* !defined(GPROF) && defined(DDBPROF) */
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 0c5607fe38a..e2d43b47eb3 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -205,7 +205,8 @@ INTRENTRY_LABEL(label):    /* from kernel */    ; \
 #define    INTRFASTEXIT \
     jmp    intr_fast_exit

-#define    INTR_FAKE_TRAP    0xbadabada
+#define    INTR_FAKE_TRAP_PUSH_RPB    0xbadabada
+#define    INTR_FAKE_TRAP_POP_RBP    0xbcbcbcbc

 /*
  * PTmap is recursive pagemap at top of virtual address space.
@@ -1259,17 +1260,32 @@ calltrap:
     jne    .Lreal_trap

     pushl    %esp
-    call    _C_LABEL(db_prof_hook)
-    addl    $4,%esp
-    cmpl    $1,%eax
-    jne    .Lreal_trap
+    subl    $4, %esp
+    pushl   %eax
+    leal    _C_LABEL(dt_prov_kprobe), %eax
+    movl    %eax, 4(%esp)
+    popl    %eax
+    call    _C_LABEL(dt_prov_kprobe_hook)
+    addl    $8,%esp
+    cmpl    $0,%eax
+    je    .Lreal_trap

     /*
      * Abuse the error field to indicate that INTRFASTEXIT needs
      * to emulate the patched instruction.
      */
-    movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-    jz    .Lalltraps_check_asts
+    cmpl $1, %eax
+    je .Lset_emulate_push_rbp
+
+    cmpl $2, %eax
+    je .Lset_emulate_ret
+
+.Lset_emulate_push_rbp:
+    movl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+    jmp    .Lalltraps_check_asts
+.Lset_emulate_ret:
+    movl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+    jmp    .Lalltraps_check_asts
 .Lreal_trap:
 #endif /* !defined(GPROF) && defined(DDBPROF) */
     pushl    %esp
@@ -1298,8 +1314,10 @@ calltrap:
      * The code below does that by trashing %eax, so it MUST be
      * restored afterward.
      */
-    cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-    je    .Lprobe_fixup
+    cmpl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+    je    .Lprobe_fixup_push_rbp
+    cmpl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+    je    .Lprobe_fixup_pop_rbp
 #endif /* !defined(GPROF) && defined(DDBPROF) */
 #ifndef DIAGNOSTIC
     INTRFASTEXIT
@@ -1327,7 +1345,7 @@ spl_lowered:

     .text
 #if !defined(GPROF) && defined(DDBPROF)
-.Lprobe_fixup:
+.Lprobe_fixup_push_rbp:
     /* Restore all register unwinding the stack. */
     INTR_RESTORE_ALL

@@ -1352,6 +1370,26 @@ spl_lowered:

     popl    %eax
     iret
+.Lprobe_fixup_pop_rbp:
+    /* Restore all register unwinding the stack. */
+    INTR_RESTORE_ALL
+
+    movl    %eax,0(%esp)
+
+    /* pop %ebp */
+    movl    20(%esp), %ebp
+    /* Shift hardware-saved registers: eflags, cs, eip */
+    movl    16(%esp), %eax
+    movl    %eax, 20(%esp)
+    movl    12(%esp), %eax
+    movl    %eax, 16(%esp)
+    movl    8(%esp), %eax
+    movl    %eax, 12(%esp)
+
+    /* Pop eax and restore the stack pointer */
+    popl    %eax
+    addl    $8, %esp
+    iret
 #endif /* !defined(GPROF) && defined(DDBPROF) */

     .text
diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC
index 315238ee034..7f708169673 100644
--- a/sys/conf/GENERIC
+++ b/sys/conf/GENERIC
@@ -4,7 +4,7 @@
 #    GENERIC kernel

 option        DDB        # in-kernel debugger
-#option        DDBPROF        # ddb(4) based profiling
+option        DDBPROF        # ddb(4) based profiling
 #option        DDB_SAFE_CONSOLE # allow break into ddb during boot
 #makeoptions    DEBUG=""    # do not compile full symbol table
 #makeoptions    PROF="-pg"    # build profiled kernel
@@ -82,7 +82,7 @@ pseudo-device    msts    1    # MSTS line discipline
 pseudo-device    endrun    1    # EndRun line discipline
 pseudo-device    vnd    4    # vnode disk devices
 pseudo-device    ksyms    1    # kernel symbols device
-#pseudo-device    dt        # Dynamic Tracer
+pseudo-device    dt    1   # Dynamic Tracer

 # clonable devices
 pseudo-device    bpfilter    # packet filter
diff --git a/sys/conf/files b/sys/conf/files
index f199715a0d4..f3b69360178 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -622,6 +622,7 @@ file    dev/dt/dt_dev.c            dt needs-flag
 file    dev/dt/dt_prov_profile.c    dt
 file    dev/dt/dt_prov_syscall.c    dt
 file    dev/dt/dt_prov_static.c        dt
+file    dev/dt/dt_prov_kprobe.c        dt

 # XXX machine-independent SCSI files should live somewhere here, maybe

diff --git a/sys/ddb/db_prof.c b/sys/ddb/db_prof.c
index 7d8190f41bc..8b3e52b8c9c 100644
--- a/sys/ddb/db_prof.c
+++ b/sys/ddb/db_prof.c
@@ -43,7 +43,6 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
@@ -57,170 +56,56 @@
 #include <ddb/db_access.h> /* for db_write_bytes() */
 #include <ddb/db_sym.h>

-extern char etext[];
-
-struct prof_probe {
-    const char        *pp_name;
-    Elf_Sym            *pp_symb;
-    SLIST_ENTRY(prof_probe)     pp_next;
-    vaddr_t             pp_inst;
-    int             pp_on;
-};
+#include "dt.h" /* for NDT */

-#define PPTSIZE        PAGE_SIZE
-#define    PPTMASK        ((PPTSIZE / sizeof(struct prof_probe)) - 1)
-#define INSTTOIDX(inst)    ((((unsigned long)(inst)) >> 4) & PPTMASK)
-SLIST_HEAD(, prof_probe) *pp_table;
+#if NDT > 0
+#include <dev/dt/dtvar.h>
+#endif

 extern int db_profile;            /* Allow dynamic profiling */
 int db_prof_on;                /* Profiling state On/Off */

-vaddr_t db_get_pc(struct trapframe *);
-vaddr_t db_get_probe_addr(struct trapframe *);
-
-void db_prof_forall(Elf_Sym *, char *, char *, int, void *);
-void db_prof_count(unsigned long, unsigned long);
-
-void
-db_prof_init(void)
-{
-    unsigned long nentries;
-
-    pp_table = malloc(PPTSIZE, M_TEMP, M_NOWAIT|M_ZERO);
-    if (pp_table == NULL)
-        return;
-
-    db_elf_sym_forall(db_prof_forall, &nentries);
-    printf("ddb probe table references %lu entry points\n", nentries);
-}
-
-void
-db_prof_forall(Elf_Sym *sym, char *name, char *suff, int pre, void *xarg)
-{
-    Elf_Sym *symb = sym;
-    unsigned long *nentries = xarg;
-    struct prof_probe *pp;
-    vaddr_t inst;
-
-    if (ELF_ST_TYPE(symb->st_info) != STT_FUNC)
-        return;
-
-    inst = symb->st_value;
-    if (inst < KERNBASE || inst >= (vaddr_t)&etext)
-        return;
-
-    if (*((uint8_t *)inst) != SSF_INST)
-        return;
-
-    if (strncmp(name, "db_", 3) == 0 || strncmp(name, "trap", 4) == 0)
-        return;
-
-#ifdef __i386__
-    /* Avoid a recursion in db_write_text(). */
-    if (strncmp(name, "pmap_pte", 8) == 0)
-        return;
-#endif
-
-    pp = malloc(sizeof(struct prof_probe), M_TEMP, M_NOWAIT|M_ZERO);
-    if (pp == NULL)
-        return;
+void dt_prov_kprobe_patch_all_entry(void);
+void dt_prov_kprobe_depatch_all_entry(void);

-    pp->pp_name = name;
-    pp->pp_inst = inst;
-    pp->pp_symb = symb;
-
-    SLIST_INSERT_HEAD(&pp_table[INSTTOIDX(pp->pp_inst)], pp, pp_next);
-
-    (*nentries)++;
-}
+vaddr_t db_get_probe_addr(struct trapframe *);
+vaddr_t db_get_pc(struct trapframe *);

 int
 db_prof_enable(void)
 {
-#if defined(__amd64__) || defined(__i386__)
-    struct prof_probe *pp;
-    uint8_t patch = BKPT_INST;
-    unsigned long s;
-    int i;
-
-    if (!db_profile)
-        return EPERM;
-
-    if (pp_table == NULL)
-        return ENOENT;
+#if NDT > 0
+       if (!db_profile)
+               return EPERM;

-    KASSERT(BKPT_SIZE == SSF_SIZE);
-
-    s = intr_disable();
-    for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) {
-        SLIST_FOREACH(pp, &pp_table[i], pp_next) {
-            pp->pp_on = 1;
-            db_write_bytes(pp->pp_inst, BKPT_SIZE, &patch);
-        }
-    }
-    intr_restore(s);
-
-    db_prof_on = 1;
-
-    return 0;
+        dt_prov_kprobe_patch_all_entry();
+        db_prof_on = 1;
+        return 0;
 #else
     return ENOENT;
-#endif
+#endif /* NDT > 0 */
 }

 void
 db_prof_disable(void)
 {
-    struct prof_probe *pp;
-    uint8_t patch = SSF_INST;
-    unsigned long s;
-    int i;
-
-    db_prof_on = 0;
-
-    s = intr_disable();
-    for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) {
-        SLIST_FOREACH(pp, &pp_table[i], pp_next) {
-            db_write_bytes(pp->pp_inst, SSF_SIZE, &patch);
-            pp->pp_on = 0;
-        }
-    }
-    intr_restore(s);
-}
-
-int
-db_prof_hook(struct trapframe *frame)
-{
-    struct prof_probe *pp;
-    vaddr_t pc, inst;
-
-    if (pp_table == NULL)
-        return 0;
-
-    pc = db_get_pc(frame);
-    inst = db_get_probe_addr(frame);
-
-    SLIST_FOREACH(pp, &pp_table[INSTTOIDX(inst)], pp_next) {
-        if (pp->pp_on && pp->pp_inst == inst) {
-            if (db_prof_on)
-                db_prof_count(pc, inst);
-            return 1;
-        }
-    }
-
-    return 0;
+#if NDT > 0
+    db_prof_on = 0;
+    dt_prov_kprobe_depatch_all_entry();
+#endif /* NDT > 0 */
 }

 /*
  * Equivalent to mcount(), must be called with interrupt disabled.
  */
 void
-db_prof_count(unsigned long frompc, unsigned long selfpc)
+db_prof_count(struct trapframe *frame)
 {
     unsigned short *frompcindex;
     struct tostruct *top, *prevtop;
     struct gmonparam *p;
     long toindex;
+    unsigned long frompc, selfpc;

     if ((p = curcpu()->ci_gmon) == NULL)
         return;
@@ -232,6 +117,8 @@ db_prof_count(unsigned long frompc, unsigned long selfpc)
     if (p->state != GMON_PROF_ON)
         return;

+    frompc = db_get_pc(frame);
+    selfpc = db_get_probe_addr(frame);
     /*
      * check that frompcindex is a reasonable pc value.
      * for example:    signal catchers get called from the stack,
@@ -241,6 +128,7 @@ db_prof_count(unsigned long frompc, unsigned long selfpc)
     if (frompc > p->textsize)
         return;

+
 #if (HASHFRACTION & (HASHFRACTION - 1)) == 0
     if (p->hashfraction == HASHFRACTION)
         frompcindex =
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index 51dc5e1cd81..64889ac8225 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -122,7 +122,7 @@ int    dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
 int    dt_ioctl_record_start(struct dt_softc *);
 void    dt_ioctl_record_stop(struct dt_softc *);
 int    dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
-void    dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
+int    dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);

 int    dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);

@@ -136,6 +136,7 @@ dtattach(struct device *parent, struct device *self, void 
*aux)
     dt_nprobes += dt_prov_profile_init();
     dt_nprobes += dt_prov_syscall_init();
     dt_nprobes += dt_prov_static_init();
+    dt_nprobes += dt_prov_kprobe_init();

     printf("dt: %u probes\n", dt_nprobes);
 }
@@ -276,6 +277,7 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
         return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
     case DTIOCRECORD:
     case DTIOCPRBENABLE:
+    case DTIOCPRBDISABLE:
         /* root only ioctl(2) */
         break;
     default:
@@ -296,6 +298,9 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
     case DTIOCPRBENABLE:
         error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
         break;
+    case DTIOCPRBDISABLE:
+            error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
+            break;
     default:
         KASSERT(0);
     }
@@ -479,6 +484,36 @@ dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
     return 0;
 }

+int
+dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
+{
+    struct dt_probe *dtp;
+    int error;
+
+    KASSERT(suser(curproc) == 0);
+    if (!dtioc_req_isvalid(dtrq))
+        return EINVAL;
+
+    SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
+        if (dtp->dtp_pbn == dtrq->dtrq_pbn)
+            break;
+    }
+    if (dtp == NULL)
+        return ENOENT;
+
+    if (dtp->dtp_prov->dtpv_desalloc)
+    {
+        error = dtp->dtp_prov->dtpv_desalloc(dtp, sc, dtrq);
+        if (error)
+            return error;
+    }
+
+    DPRINTF("dt%d: pid %d desalloc\n", sc->ds_unit, sc->ds_pid,
+            dtrq->dtrq_pbn);
+
+    return 0;
+}
+
 struct dt_probe *
 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider 
*dtpv)
 {
@@ -493,6 +528,9 @@ dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
     dtp->dtp_func = func;
     dtp->dtp_name = name;
     dtp->dtp_sysnum = -1;
+    dtp->dtp_ref = 0;
+
+    mtx_init(&dtp->dtp_mtx, IPL_HIGH);

     return dtp;
 }
diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c
new file mode 100644
index 00000000000..bd7c06e5680
--- /dev/null
+++ b/sys/dev/dt/dt_prov_kprobe.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2020 Tom Rollet <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/atomic.h>
+#include <sys/exec_elf.h>
+
+#include <ddb/db_elf.h>
+#include <machine/db_machdep.h>
+#include <ddb/db_extern.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_interface.h>
+
+#include <dev/dt/dtvar.h>
+
+int
+dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+        struct dt_pcb_list *plist, struct dtioc_req *dtrq);
+int dt_prov_kprobe_hook(struct dt_provider *dtpv, ...);
+int
+dt_prov_kprobe_desalloc(struct dt_probe *dtp, struct dt_softc *sc,
+        struct dtioc_req *dtrq);
+
+void db_prof_count(struct trapframe *frame);
+
+
+struct kprobe_probe
+{
+    struct dt_probe* dtp;
+    SLIST_ENTRY(kprobe_probe) kprobe_next;
+};
+
+uint32_t hash_tmp( uint32_t a)
+{
+    a = (a+0x7ed55d16) + (a<<12);
+    a = (a^0xc761c23c) ^ (a>>19);
+    a = (a+0x165667b1) + (a<<5);
+    a = (a+0xd3a2646c) ^ (a<<9);
+    a = (a+0xfd7046c5) + (a<<3);
+    a = (a^0xb55a4f09) ^ (a>>16);
+    return a;
+}
+
+#define PPTSIZE        PAGE_SIZE * 30
+#define    PPTMASK        ((PPTSIZE / sizeof(struct kprobe_probe)) - 1)
+#define INSTTOIDX(inst)    ( hash_tmp(inst) & PPTMASK)
+
+SLIST_HEAD(, kprobe_probe) *dtpf_entry;
+SLIST_HEAD(, kprobe_probe) *dtpf_return;
+int nb_probes_entry = 0;
+int nb_probes_return = 0;
+
+
+#define DTEVT_PROV_KPROBE (DTEVT_COMMON|DTEVT_FUNCARGS)
+
+#define KPROBE_ENTRY "entry"
+#define KPROBE_RETURN "return"
+
+#if defined(__amd64__)
+#define KPROBE_RETGUARD_MOV_1 0x4c
+#define KPROBE_RETGUARD_MOV_2 0x8b
+#define KPROBE_RETGUARD_MOV_3 0x1d
+
+#define KPROBE_RETGUARD_MOV_SIZE 7
+
+#define KPROBE_RETGUARD_XOR_1 0x4c
+#define KPROBE_RETGUARD_XOR_2 0x33
+#define KPROBE_RETGUARD_XOR_3 0x1c
+
+#define KPROBE_RETGUARD_XOR_SIZE 4
+
+#define RET 0xc3
+#define RET_SIZE 1
+
+#elif defined(__i386__)
+#define POP_RBP 0x5d
+#define POP_RBP_SIZE 1
+#endif
+
+struct dt_provider dt_prov_kprobe = {
+    .dtpv_name    = "kprobe",
+    .dtpv_alloc   = dt_prov_kprobe_alloc,
+    .dtpv_enter   = dt_prov_kprobe_hook,
+    .dtpv_leave   = NULL,
+    .dtpv_desalloc = dt_prov_kprobe_desalloc,
+};
+
+extern db_symtab_t db_symtab;
+extern char __kutext_end[];
+extern int db_prof_on;
+
+/* Initialize all entry and return probes and store them in global arrays */
+    int
+dt_prov_kprobe_init(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+    struct dt_probe *dtp;
+    struct kprobe_probe *kprobe_dtp;
+    Elf_Sym *symp, *symtab_start, *symtab_end;
+    char *strtab, *name;
+    vaddr_t inst, limit;
+    int nb_sym, nb_probes;
+
+
+    nb_sym = (db_symtab.end - db_symtab.start) / sizeof (Elf_Sym);
+    nb_probes = nb_probes_entry = nb_probes_return = 0;
+
+    dtpf_entry = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO);
+    if (dtpf_entry == NULL)
+        goto end;
+
+    dtpf_return = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO);
+    if (dtpf_return == NULL)
+        goto end;
+
+    db_symtab_t *stab = &db_symtab;
+
+    symtab_start = STAB_TO_SYMSTART(stab);
+    symtab_end = STAB_TO_SYMEND(stab);
+
+    strtab = db_elf_find_strtab(stab);
+
+    for (symp = symtab_start; symp < symtab_end; symp++) {
+        if (ELF_ST_TYPE(symp->st_info) != STT_FUNC)
+            continue;
+
+        inst = symp->st_value;
+        name = strtab + symp->st_name;
+        limit = symp->st_value + symp->st_size;
+
+        /* Filter function that are not mapped in memory */
+        if (inst < KERNBASE || inst >= (vaddr_t)&__kutext_end)
+            continue;
+
+        /* Remove some function to avoid recursive tracing */
+        if (strncmp(name, "dt_", 3) == 0 || strncmp(name, "trap", 4) == 0 ||
+                strncmp(name, "db_", 3) == 0)
+            continue;
+
+#if defined(__amd64__)
+        /* Find if there is a retguard, if so move the inst pointer to the later 'push rbp' */
+        if (*((uint8_t *)inst) != SSF_INST) {
+            /* No retguards in i386 */
+            if (((uint8_t *)inst)[0] != KPROBE_RETGUARD_MOV_1 ||
+                ((uint8_t *)inst)[1] != KPROBE_RETGUARD_MOV_2 ||
+                ((uint8_t *)inst)[2] != KPROBE_RETGUARD_MOV_3 ||
+                ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE] != KPROBE_RETGUARD_XOR_1 || +                ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 1] != KPROBE_RETGUARD_XOR_2 || +                ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 2] != KPROBE_RETGUARD_XOR_3 || +                ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + KPROBE_RETGUARD_XOR_SIZE] != SSF_INST)
+                continue;
+            inst = (vaddr_t)&(((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + KPROBE_RETGUARD_XOR_SIZE]);
+        }
+#elif defined(__i386__)
+        if (*((uint8_t *)inst) != SSF_INST)
+            continue;
+#endif
+
+        dtp = dt_dev_alloc_probe(name, KPROBE_ENTRY, &dt_prov_kprobe);
+        if (dtp == NULL)
+            goto end;
+
+        kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, 
M_NOWAIT|M_ZERO);
+        if (kprobe_dtp == NULL)
+            goto end;
+        kprobe_dtp->dtp = dtp;
+
+        dtp->dtp_addr = inst;
+        dtp->dtp_nargs = db_ctf_func_numargs(symp);
+        dt_dev_register_probe(dtp);
+
+ SLIST_INSERT_HEAD(&dtpf_entry[INSTTOIDX(dtp->dtp_addr)], kprobe_dtp, 
kprobe_next);
+
+        nb_probes++;
+        nb_probes_entry++;
+
+
+        /*
+         *  Poor method to find the return point
+         *  => we would need a disassembler to find all return points
+         *  For now we start from the end of the function, iterate on
+         *  int3 inserted for retguard until we find a ret
+         */
+#if defined(__amd64__)
+        if (*(uint8_t *)(limit - 1) != RET)
+            continue;
+        inst = limit - 1;
+#elif defined(__i386__)
+        /*
+         * Little temporary hack to find some return probe
+         *   => always int3 after 'pop %rpb; ret'
+         */
+        while(*((uint8_t *)inst) == 0xcc)
+            (*(uint8_t *)inst) -= 1;
+        if (*(uint8_t *)(limit - 2) != POP_RBP)
+            continue;
+        inst = limit - 2;
+#endif
+
+        dtp = dt_dev_alloc_probe(name, KPROBE_RETURN, &dt_prov_kprobe);
+        if (dtp == NULL)
+            goto end;
+
+        kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, 
M_NOWAIT|M_ZERO);
+        if (kprobe_dtp == NULL)
+            goto end;
+        kprobe_dtp->dtp = dtp;
+
+        dtp->dtp_addr = inst;
+        dt_dev_register_probe(dtp);
+ SLIST_INSERT_HEAD(&dtpf_return[INSTTOIDX(dtp->dtp_addr)], kprobe_dtp, kprobe_next);
+        nb_probes++;
+        nb_probes_return++;
+    }
+#endif
+end:
+    printf("dt kprobe: entry: %d return: %d\n", nb_probes_entry, 
nb_probes_return);
+    return nb_probes;
+}
+
+    int
+dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+        struct dt_pcb_list *plist, struct dtioc_req *dtrq)
+{
+#if defined(__amd64__) || defined(__i386__)
+    uint8_t patch = BKPT_INST;
+    struct dt_pcb *dp;
+    unsigned s;
+
+    dp = dt_pcb_alloc(dtp, sc);
+    if (dp == NULL)
+        return ENOMEM;
+
+    /* Patch only if it's first pcb referencing this probe */
+    mtx_enter(&dtp->dtp_mtx);
+    dtp->dtp_ref++;
+    assert(dtp->dtp_ref != 0);
+
+    if (dtp->dtp_ref == 1) {
+        s = intr_disable();
+        db_write_bytes(dtp->dtp_addr, BKPT_SIZE, &patch);
+        intr_restore(s);
+    }
+    mtx_leave(&dtp->dtp_mtx);
+
+    dp->dp_filter = dtrq->dtrq_filter;
+    dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_KPROBE;
+    TAILQ_INSERT_HEAD(plist, dp, dp_snext);
+    return 0;
+
+#else
+    return ENOENT;
+#endif
+}
+
+    int
+dt_prov_kprobe_desalloc(struct dt_probe *dtp, struct dt_softc *sc,
+        struct dtioc_req *dtrq)
+{
+#if defined(__amd64__) || defined(__i386__)
+    uint8_t patch;
+    int size;
+    unsigned s;
+
+    if (strcmp(dtp->dtp_name, KPROBE_ENTRY) == 0)
+    {
+        patch = SSF_INST;
+        size  = SSF_SIZE;
+    }
+    else if (strcmp(dtp->dtp_name, KPROBE_RETURN) == 0)
+    {
+#if defined(__amd64__)
+        patch = RET;
+        size  = RET_SIZE;
+#elif defined(__i386__)
+        patch = POP_RBP;
+        size  = POP_RBP_SIZE;
+#endif
+    }
+    else
+        assert(0 && "Trying to desalloc not yet implemented probe type");
+
+    mtx_enter(&dtp->dtp_mtx);
+    dtp->dtp_ref--;
+
+    if (dtp->dtp_ref == 0) {
+        s = intr_disable();
+        db_write_bytes(dtp->dtp_addr, size, &patch);
+        intr_restore(s);
+    }
+
+    mtx_leave(&dtp->dtp_mtx);
+
+    /* Desallocation of pcb is done by dt_pcb_purge while closing the dev */
+    return 0;
+
+#else
+    return ENOENT;
+#endif
+}
+
+    int
+dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
+{
+#if defined(__amd64__) || defined(__i386__)
+    struct dt_probe *dtp;
+    struct dt_pcb *dp;
+    va_list ap;
+    struct trapframe *tf;
+    int is_dt_bkpt = 0;
+    struct kprobe_probe *kprobe_dtp;
+    /* Arguments for entry probes */
+    vaddr_t *args;
+    size_t argsize;
+    /* Return values for return probes*/
+    int error;
+    register_t retval[2];
+
+    KASSERT(dtpv == &dt_prov_kprobe);
+
+    va_start(ap, dtpv);
+    tf = va_arg(ap, struct trapframe*);
+    va_end(ap);
+
+#if defined(__amd64__)
+    vaddr_t addr = tf->tf_rip - BKPT_SIZE;
+#elif defined(__i386)
+    vaddr_t addr = tf->tf_eip - BKPT_SIZE;
+#endif
+
+
+    SLIST_FOREACH(kprobe_dtp, &dtpf_entry[INSTTOIDX(addr)], kprobe_next) {
+        dtp = kprobe_dtp->dtp;
+
+        if (dtp->dtp_addr != addr)
+            continue;
+
+        is_dt_bkpt = 1;
+        if (db_prof_on)
+            db_prof_count(tf);
+
+        if (!dtp->dtp_recording)
+            continue;
+
+        smr_read_enter();
+        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+            struct dt_evt *dtev;
+
+            dtev = dt_pcb_ring_get(dp, 0);
+            if (dtev == NULL)
+                continue;
+
+#if defined(__amd64__)
+            args = (vaddr_t *)tf->tf_rdi;
+            argsize = 6; /* Only 6 first args are passed on registers  */
+#elif defined(__i386)
+            /* All args on stack */
+            args = (vaddr_t *)(tf->tf_esp + 4);
+            argsize = 10;
+#endif
+
+            if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS))
+                memcpy(dtev->dtev_args, args, argsize);
+
+            dt_pcb_ring_consume(dp, dtev);
+        }
+        smr_read_leave();
+    }
+
+    if (is_dt_bkpt)
+        return is_dt_bkpt;
+
+    SLIST_FOREACH(kprobe_dtp, &dtpf_return[INSTTOIDX(addr)], kprobe_next) {
+        dtp = kprobe_dtp->dtp;
+
+        if (dtp->dtp_addr != addr)
+            continue;
+
+        is_dt_bkpt = 2;
+
+        if (!dtp->dtp_recording)
+            continue;
+
+        smr_read_enter();
+        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+            struct dt_evt *dtev;
+
+            dtev = dt_pcb_ring_get(dp, 0);
+            if (dtev == NULL)
+                continue;
+
+#if defined(__amd64__)
+            retval[0] = tf->tf_rax;
+            retval[1] = 0;
+            error = 0;
+#elif defined(__i386)
+            retval[0] = tf->tf_eax;
+            retval[1] = 0;
+            error = 0;
+#endif
+
+            dtev->dtev_retval[0] = retval[0];
+            dtev->dtev_retval[1] = retval[1];
+            dtev->dtev_error = error;
+
+            dt_pcb_ring_consume(dp, dtev);
+        }
+        smr_read_leave();
+    }
+    return is_dt_bkpt;
+#else
+    return 0;
+#endif
+}
+
+/* Function called by ddb to patch all functions without allocating 1 pcb per probe */
+void dt_prov_kprobe_patch_all_entry(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+    uint8_t patch = BKPT_INST;
+    struct dt_probe *dtp;
+    struct kprobe_probe *kprobe_dtp;
+    size_t i;
+
+
+
+    for (i = 0; i < PPTMASK; ++i)
+    {
+        SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) {
+            dtp = kprobe_dtp->dtp;
+
+            mtx_enter(&dtp->dtp_mtx);
+            dtp->dtp_ref++;
+
+            if (dtp->dtp_ref == 1) {
+                unsigned s;
+                s = intr_disable();
+                db_write_bytes(dtp->dtp_addr, BKPT_SIZE, &patch);
+                intr_restore(s);
+            }
+
+            mtx_leave(&dtp->dtp_mtx);
+        }
+    }
+#endif
+}
+
+/* Function called by ddb to patch all functions without allocating 1 pcb per probe */
+void dt_prov_kprobe_depatch_all_entry(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+    uint8_t patch = SSF_INST;
+    struct dt_probe *dtp;
+    struct kprobe_probe *kprobe_dtp;
+
+    size_t i;
+
+    for (i = 0; i < PPTMASK; ++i)
+    {
+        SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) {
+            dtp = kprobe_dtp->dtp;
+
+            mtx_enter(&dtp->dtp_mtx);
+            dtp->dtp_ref--;
+
+            if (dtp->dtp_ref == 0) {
+                unsigned s;
+                s = intr_disable();
+                db_write_bytes(dtp->dtp_addr, SSF_SIZE, &patch);
+                intr_restore(s);
+            }
+
+            mtx_leave(&dtp->dtp_mtx);
+        }
+
+    }
+
+#else
+#endif
+}
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 60d59af54f7..4cd48b5f676 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -31,14 +31,15 @@ struct dt_probe    *dtpp_interval;        /* global periodic probe */

 int    dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
         struct dt_pcb_list *, struct dtioc_req *);
-void    dt_prov_profile_enter(struct dt_provider *, ...);
-void    dt_prov_interval_enter(struct dt_provider *, ...);
+int    dt_prov_profile_enter(struct dt_provider *, ...);
+int    dt_prov_interval_enter(struct dt_provider *, ...);

 struct dt_provider dt_prov_profile = {
     .dtpv_name    = "profile",
     .dtpv_alloc    = dt_prov_profile_alloc,
     .dtpv_enter    = dt_prov_profile_enter,
     .dtpv_leave    = NULL,
+    .dtpv_desalloc    = NULL,
 };

 struct dt_provider dt_prov_interval = {
@@ -46,6 +47,7 @@ struct dt_provider dt_prov_interval = {
     .dtpv_alloc    = dt_prov_profile_alloc,
     .dtpv_enter    = dt_prov_interval_enter,
     .dtpv_leave    = NULL,
+    .dtpv_desalloc    = NULL,
 };

 int
@@ -114,7 +116,7 @@ dt_prov_profile_fire(struct dt_pcb *dp)
     dp->dp_nticks = 0;
 }

-void
+int
 dt_prov_profile_enter(struct dt_provider *dtpv, ...)
 {
     struct cpu_info *ci = curcpu();
@@ -130,9 +132,10 @@ dt_prov_profile_enter(struct dt_provider *dtpv, ...)
         dt_prov_profile_fire(dp);
     }
     smr_read_leave();
+    return 0;
 }

-void
+int
 dt_prov_interval_enter(struct dt_provider *dtpv, ...)
 {
     struct dt_pcb *dp;
@@ -144,4 +147,5 @@ dt_prov_interval_enter(struct dt_provider *dtpv, ...)
         dt_prov_profile_fire(dp);
     }
     smr_read_leave();
+    return 0;
 }
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
index d7e10542a0c..08d12940236 100644
--- a/sys/dev/dt/dt_prov_static.c
+++ b/sys/dev/dt/dt_prov_static.c
@@ -25,12 +25,13 @@

 int    dt_prov_static_alloc(struct dt_probe *, struct dt_softc *,
         struct dt_pcb_list *, struct dtioc_req *);
-void    dt_prov_static_hook(struct dt_provider *, ...);
+int    dt_prov_static_hook(struct dt_provider *, ...);

 struct dt_provider dt_prov_static = {
     .dtpv_name = "tracepoint",
     .dtpv_alloc = dt_prov_static_alloc,
     .dtpv_enter = dt_prov_static_hook,
+    .dtpv_desalloc = NULL,
 };

 /*
@@ -120,7 +121,7 @@ dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc 
*sc,
     return 0;
 }

-void
+int
 dt_prov_static_hook(struct dt_provider *dtpv, ...)
 {
     struct dt_probe *dtp;
@@ -146,13 +147,14 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...)
         if (dtev == NULL)
             continue;

-        dtev->dtev_sysargs[0] = args[0];
-        dtev->dtev_sysargs[1] = args[1];
-        dtev->dtev_sysargs[2] = args[2];
-        dtev->dtev_sysargs[3] = args[3];
-        dtev->dtev_sysargs[4] = args[4];
+        dtev->dtev_args[0] = args[0];
+        dtev->dtev_args[1] = args[1];
+        dtev->dtev_args[2] = args[2];
+        dtev->dtev_args[3] = args[3];
+        dtev->dtev_args[4] = args[4];

         dt_pcb_ring_consume(dp, dtev);
     }
     smr_read_leave();
+    return 1;
 }
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
index b2e82551929..0c1c23b290a 100644
--- a/sys/dev/dt/dt_prov_syscall.c
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -37,7 +37,7 @@ unsigned int      dtps_nsysent = SYS_MAXSYSCALL;

 int    dt_prov_syscall_alloc(struct dt_probe *, struct dt_softc *,
         struct dt_pcb_list *, struct dtioc_req *);
-void    dt_prov_syscall_entry(struct dt_provider *, ...);
+int    dt_prov_syscall_entry(struct dt_provider *, ...);
 void    dt_prov_syscall_return(struct dt_provider *, ...);

 struct dt_provider dt_prov_syscall = {
@@ -45,6 +45,7 @@ struct dt_provider dt_prov_syscall = {
     .dtpv_alloc    = dt_prov_syscall_alloc,
     .dtpv_enter    = dt_prov_syscall_entry,
     .dtpv_leave    = dt_prov_syscall_return,
+        .dtpv_desalloc = NULL,
 };

 int
@@ -119,7 +120,7 @@ dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc 
*sc,
     return 0;
 }

-void
+int
 dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
 {
     struct dt_probe *dtp;
@@ -136,14 +137,14 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
     args = va_arg(ap, register_t*);
     va_end(ap);

-    KASSERT((argsize / sizeof(register_t)) <= DTMAXSYSARGS);
+    KASSERT((argsize / sizeof(register_t)) <= DTMAXFUNCARGS);

     if (sysnum < 0 || sysnum >= dtps_nsysent)
-        return;
+        return 0;

     dtp = dtps_entry[sysnum];
     if (!dtp->dtp_recording)
-        return;
+        return 0;

     smr_read_enter();
     SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
@@ -154,11 +155,12 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
             continue;

         if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS))
-            memcpy(dtev->dtev_sysargs, args, argsize);
+            memcpy(dtev->dtev_args, args, argsize);

         dt_pcb_ring_consume(dp, dtev);
     }
     smr_read_leave();
+    return 0;
 }

 void
@@ -196,13 +198,13 @@ dt_prov_syscall_return(struct dt_provider *dtpv, ...)
             continue;

         if (error) {
-            dtev->dtev_sysretval[0] = -1;
-            dtev->dtev_sysretval[1] = 0;
-            dtev->dtev_syserror = error;
+            dtev->dtev_retval[0] = -1;
+            dtev->dtev_retval[1] = 0;
+            dtev->dtev_error = error;
         } else {
-            dtev->dtev_sysretval[0] = retval[0];
-            dtev->dtev_sysretval[1] = retval[1];
-            dtev->dtev_syserror = 0;
+            dtev->dtev_retval[0] = retval[0];
+            dtev->dtev_retval[1] = retval[1];
+            dtev->dtev_error = 0;
         }

         dt_pcb_ring_consume(dp, dtev);
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index 3e7985692eb..6d2dd1a8110 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -34,9 +34,9 @@
 #define DTMAXCOMLEN    16

 /*
- * Maximum number of arguments passed to a syscall.
+ * Maximum number of arguments passed to a function.
  */
-#define DTMAXSYSARGS    10
+#define DTMAXFUNCARGS    10

 /*
  * Event state: where to store information when a probe fires.
@@ -54,15 +54,15 @@ struct dt_evt {
     struct stacktrace     dtev_kstack;    /* kernel stack frame */
     char            dtev_comm[DTMAXCOMLEN+1]; /* current pr. name */
     union {
-        register_t        E_entry[DTMAXSYSARGS];
+        register_t        E_entry[DTMAXFUNCARGS];
         struct {
             register_t        __retval[2];
             int            __error;
         } E_return;
-    } _sys;
-#define dtev_sysargs    _sys.E_entry        /* syscall args. */
-#define dtev_sysretval    _sys.E_return.__retval    /* syscall retval */
-#define dtev_syserror    _sys.E_return.__error    /* syscall error */
+    } _args;
+#define dtev_args    _args.E_entry            /* function args. */
+#define dtev_retval    _args.E_return.__retval    /* function retval */
+#define dtev_error    _args.E_return.__error    /* function error */

 };

@@ -132,6 +132,7 @@ struct dtioc_stat {

 #define DTIOCRECORD    _IOW('D', 3, int)
 #define DTIOCPRBENABLE    _IOW('D', 4, struct dtioc_req)
+#define DTIOCPRBDISABLE     _IOW('D', 5, struct dtioc_req)


 #ifdef _KERNEL
@@ -213,12 +214,16 @@ struct dt_probe {
     const char        *dtp_func;    /* [I] probe function */
     const char        *dtp_name;    /* [I] probe name */
     uint32_t         dtp_pbn;    /* [I] unique ID */
-    volatile uint32_t     dtp_recording;    /* [D] is it recording? */
-    uint8_t             dtp_nargs;    /* [I] # of arguments */
+    volatile uint32_t     dtp_recording;    /* [d] is it recording? */
+
+    struct mutex     dtp_mtx;
+    unsigned         dtp_ref; /* [m] number of pcb referencing this probe*/

     /* Provider specific fields. */
-    int             dtp_sysnum;    /* [I] related # of syscall */
-    const char        *dtp_argtype[5];/* [I] type of arguments */
+    int            dtp_sysnum;    /* [I] related # of syscall */
+    const char    *dtp_argtype[5];/* [I] type of arguments */
+    int            dtp_nargs;    /* [I] # of arguments */
+    vaddr_t     dtp_addr; /* [I] address of breakpint */
 };


@@ -231,13 +236,18 @@ struct dt_provider {

     int        (*dtpv_alloc)(struct dt_probe *, struct dt_softc *,
                 struct dt_pcb_list *, struct dtioc_req *);
-    void        (*dtpv_enter)(struct dt_provider *, ...);
+    int        (*dtpv_enter)(struct dt_provider *, ...);
     void        (*dtpv_leave)(struct dt_provider *, ...);
+    int        (*dtpv_desalloc)(struct dt_probe *, struct dt_softc *,
+                struct dtioc_req *);
 };

+extern struct dt_provider dt_prov_kprobe;
+
 int         dt_prov_profile_init(void);
 int         dt_prov_syscall_init(void);
 int         dt_prov_static_init(void);
+int         dt_prov_kprobe_init(void);

 struct dt_probe *dt_dev_alloc_probe(const char *, const char *,
             struct dt_provider *);
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 1de0726ab95..3f8d59d7832 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -70,10 +70,6 @@ prof_init(void)
     char *cp;
     int size;

-#if !defined(GPROF) && defined(DDBPROF)
-    db_prof_init();
-#endif
-
     /*
      * Round lowpc and highpc to multiples of the density we're using
      * so the rest of the scaling (here and in gprof) stays in ints.
diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c
index 7b77ceb1366..fabbb4f78c0 100644
--- a/usr.sbin/btrace/btrace.c
+++ b/usr.sbin/btrace/btrace.c
@@ -498,19 +498,24 @@ rules_teardown(int fd)
     struct bt_rule *r, *rend = NULL;
     int dokstack = 0, off = 0;

+
     if (g_nprobes > 0) {
         if (ioctl(fd, DTIOCRECORD, &off))
             err(1, "DTIOCRECORD");
     }

+
     TAILQ_FOREACH(r, &g_rules, br_next) {
+        dtrq = r->br_cookie;
         if (r->br_type != B_RT_PROBE) {
             if (r->br_type == B_RT_END)
                 rend = r;
             continue;
-        }
+        } else {
+            if (ioctl(fd, DTIOCPRBDISABLE, dtrq))
+                err(1, "DTIOCPRBDISABLE");
+        }

-        dtrq = r->br_cookie;
         if (dtrq->dtrq_evtflags & DTEVT_KSTACK)
             dokstack = 1;
     }
@@ -657,8 +662,7 @@ builtin_arg(struct dt_evt *dtev, enum bt_argtype dat)
     static char buf[sizeof("18446744073709551615")]; /* UINT64_MAX */

     snprintf(buf, sizeof(buf) - 1, "%lu",
-        dtev->dtev_sysargs[dat - B_AT_BI_ARG0]);
-
+            dtev->dtev_args[dat - B_AT_BI_ARG0]);
     return buf;
 }

@@ -1088,7 +1092,7 @@ ba2str(struct bt_arg *ba, struct dt_evt *dtev)
         str = builtin_arg(dtev, ba->ba_type);
         break;
     case B_AT_BI_RETVAL:
-        snprintf(buf, sizeof(buf) - 1, "%ld", (long)dtev->dtev_sysretval[0]);
+        snprintf(buf, sizeof(buf) - 1, "%ld", (long)dtev->dtev_retval[0]);
         str = buf;
         break;
     case B_AT_MAP:
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index dd2dfde3e3b..3a0a58ba3ac 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -188,10 +188,11 @@ INTRENTRY_LABEL(trap03):
        sti
        cld
        SMAP_CLAC
-       movq    %rsp, %rdi
-       call    _C_LABEL(db_prof_hook)
-       cmpl    $1, %eax
-       jne     .Lreal_kern_trap
+       leaq    _C_LABEL(dt_prov_kprobe), %rdi
+       movq    %rsp, %rsi
+       call    _C_LABEL(dt_prov_kprobe_hook)
+       cmpl    $0, %eax
+       je     .Lreal_kern_trap
 
        cli
        movq    TF_RDI(%rsp),%rdi
@@ -210,6 +211,11 @@ INTRENTRY_LABEL(trap03):
        movq    TF_R11(%rsp),%r11
        /* %rax restored below, after being used to shift the stack */
 
+       cmpl    $2, %eax
+       je      .Lemulate_ret
+
+.Lemulate_push_rbp:
+
        /*
         * We are returning from a probe trap so we need to fix the
         * stack layout and emulate the patched instruction.
@@ -217,6 +223,9 @@ INTRENTRY_LABEL(trap03):
         */
        subq    $16, %rsp
 
+       movq    (TF_RAX + 16)(%rsp), %rax
+       movq    %rax, TF_RAX(%rsp)
+
        /* Shift hardware-saved registers. */
        movq    (TF_RIP + 16)(%rsp), %rax
        movq    %rax, TF_RIP(%rsp)
@@ -237,7 +246,20 @@ INTRENTRY_LABEL(trap03):
 
        /* Finally restore %rax */
        movq    (TF_RAX + 16)(%rsp),%rax
+       jmp .ret_int3
+
+.Lemulate_ret:
+
+       /* Store a new return address in %rip */
+       movq    TF_RSP(%rsp), %rax
+       movq    (%rax), %rax
+       movq    %rax, TF_RIP(%rsp)
+       addq    $8, TF_RSP(%rsp)
+
+       /* Finally restore %rax */
+       movq    (TF_RAX)(%rsp),%rax
 
+.ret_int3:
        addq    $TF_RIP,%rsp
        iretq
 #endif /* !defined(GPROF) && defined(DDBPROF) */
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 0c5607fe38a..e2d43b47eb3 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -205,7 +205,8 @@ INTRENTRY_LABEL(label):     /* from kernel */       ; \
 #define        INTRFASTEXIT \
        jmp     intr_fast_exit
 
-#define        INTR_FAKE_TRAP  0xbadabada
+#define        INTR_FAKE_TRAP_PUSH_RPB 0xbadabada
+#define        INTR_FAKE_TRAP_POP_RBP  0xbcbcbcbc
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
@@ -1259,17 +1260,32 @@ calltrap:
        jne     .Lreal_trap
 
        pushl   %esp
-       call    _C_LABEL(db_prof_hook)
-       addl    $4,%esp
-       cmpl    $1,%eax
-       jne     .Lreal_trap
+       subl    $4, %esp
+       pushl   %eax
+       leal    _C_LABEL(dt_prov_kprobe), %eax
+       movl    %eax, 4(%esp)
+       popl    %eax
+       call    _C_LABEL(dt_prov_kprobe_hook)
+       addl    $8,%esp
+       cmpl    $0,%eax
+       je      .Lreal_trap
 
        /*
         * Abuse the error field to indicate that INTRFASTEXIT needs
         * to emulate the patched instruction.
         */
-       movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-       jz      .Lalltraps_check_asts
+       cmpl $1, %eax
+       je .Lset_emulate_push_rbp
+       
+       cmpl $2, %eax
+       je .Lset_emulate_ret
+
+.Lset_emulate_push_rbp:
+       movl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+       jmp     .Lalltraps_check_asts
+.Lset_emulate_ret:
+       movl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+       jmp     .Lalltraps_check_asts
 .Lreal_trap:
 #endif /* !defined(GPROF) && defined(DDBPROF) */
        pushl   %esp
@@ -1298,8 +1314,10 @@ calltrap:
         * The code below does that by trashing %eax, so it MUST be
         * restored afterward.
         */
-       cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-       je      .Lprobe_fixup
+       cmpl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+       je      .Lprobe_fixup_push_rbp
+       cmpl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+       je      .Lprobe_fixup_pop_rbp
 #endif /* !defined(GPROF) && defined(DDBPROF) */
 #ifndef DIAGNOSTIC
        INTRFASTEXIT
@@ -1327,7 +1345,7 @@ spl_lowered:
 
        .text
 #if !defined(GPROF) && defined(DDBPROF)
-.Lprobe_fixup:
+.Lprobe_fixup_push_rbp:
        /* Restore all register unwinding the stack. */
        INTR_RESTORE_ALL
 
@@ -1352,6 +1370,26 @@ spl_lowered:
 
        popl    %eax
        iret
+.Lprobe_fixup_pop_rbp:
+    /* Restore all register unwinding the stack. */
+    INTR_RESTORE_ALL
+
+    movl       %eax,0(%esp)
+    
+    /* pop %ebp */
+    movl    20(%esp), %ebp
+       /* Shift hardware-saved registers: eflags, cs, eip */
+    movl    16(%esp), %eax
+    movl    %eax, 20(%esp)
+    movl    12(%esp), %eax
+    movl    %eax, 16(%esp)
+    movl    8(%esp), %eax 
+    movl    %eax, 12(%esp)
+
+    /* Pop eax and restore the stack pointer */
+    popl    %eax
+    addl    $8, %esp
+    iret
 #endif /* !defined(GPROF) && defined(DDBPROF) */
 
        .text
diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC
index 315238ee034..7f708169673 100644
--- a/sys/conf/GENERIC
+++ b/sys/conf/GENERIC
@@ -4,7 +4,7 @@
 #      GENERIC kernel
 
 option         DDB             # in-kernel debugger
-#option                DDBPROF         # ddb(4) based profiling
+option         DDBPROF         # ddb(4) based profiling
 #option                DDB_SAFE_CONSOLE # allow break into ddb during boot
 #makeoptions   DEBUG=""        # do not compile full symbol table
 #makeoptions   PROF="-pg"      # build profiled kernel
@@ -82,7 +82,7 @@ pseudo-device msts    1       # MSTS line discipline
 pseudo-device  endrun  1       # EndRun line discipline
 pseudo-device  vnd     4       # vnode disk devices
 pseudo-device  ksyms   1       # kernel symbols device
-#pseudo-device dt              # Dynamic Tracer
+pseudo-device  dt      1   # Dynamic Tracer
 
 # clonable devices
 pseudo-device  bpfilter        # packet filter
diff --git a/sys/conf/files b/sys/conf/files
index f199715a0d4..f3b69360178 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -622,6 +622,7 @@ file        dev/dt/dt_dev.c                 dt      
needs-flag
 file   dev/dt/dt_prov_profile.c        dt
 file   dev/dt/dt_prov_syscall.c        dt
 file   dev/dt/dt_prov_static.c         dt
+file   dev/dt/dt_prov_kprobe.c         dt
 
 # XXX machine-independent SCSI files should live somewhere here, maybe
 
diff --git a/sys/ddb/db_prof.c b/sys/ddb/db_prof.c
index 7d8190f41bc..8b3e52b8c9c 100644
--- a/sys/ddb/db_prof.c
+++ b/sys/ddb/db_prof.c
@@ -43,7 +43,6 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
@@ -57,170 +56,56 @@
 #include <ddb/db_access.h> /* for db_write_bytes() */
 #include <ddb/db_sym.h>
 
-extern char etext[];
-
-struct prof_probe {
-       const char              *pp_name;
-       Elf_Sym                 *pp_symb;
-       SLIST_ENTRY(prof_probe)  pp_next;
-       vaddr_t                  pp_inst;
-       int                      pp_on;
-};
+#include "dt.h" /* for NDT */
 
-#define PPTSIZE                PAGE_SIZE
-#define        PPTMASK         ((PPTSIZE / sizeof(struct prof_probe)) - 1)
-#define INSTTOIDX(inst)        ((((unsigned long)(inst)) >> 4) & PPTMASK)
-SLIST_HEAD(, prof_probe) *pp_table;
+#if NDT > 0
+#include <dev/dt/dtvar.h>
+#endif
 
 extern int db_profile;                 /* Allow dynamic profiling */
 int db_prof_on;                                /* Profiling state On/Off */
 
-vaddr_t db_get_pc(struct trapframe *);
-vaddr_t db_get_probe_addr(struct trapframe *);
-
-void db_prof_forall(Elf_Sym *, char *, char *, int, void *);
-void db_prof_count(unsigned long, unsigned long);
-
-void
-db_prof_init(void)
-{
-       unsigned long nentries;
-
-       pp_table = malloc(PPTSIZE, M_TEMP, M_NOWAIT|M_ZERO);
-       if (pp_table == NULL)
-               return;
-
-       db_elf_sym_forall(db_prof_forall, &nentries);
-       printf("ddb probe table references %lu entry points\n", nentries);
-}
-
-void
-db_prof_forall(Elf_Sym *sym, char *name, char *suff, int pre, void *xarg)
-{
-       Elf_Sym *symb = sym;
-       unsigned long *nentries = xarg;
-       struct prof_probe *pp;
-       vaddr_t inst;
-
-       if (ELF_ST_TYPE(symb->st_info) != STT_FUNC)
-               return;
-
-       inst = symb->st_value;
-       if (inst < KERNBASE || inst >= (vaddr_t)&etext)
-               return;
-
-       if (*((uint8_t *)inst) != SSF_INST)
-               return;
-
-       if (strncmp(name, "db_", 3) == 0 || strncmp(name, "trap", 4) == 0)
-               return;
-
-#ifdef __i386__
-       /* Avoid a recursion in db_write_text(). */
-       if (strncmp(name, "pmap_pte", 8) == 0)
-               return;
-#endif
-
-       pp = malloc(sizeof(struct prof_probe), M_TEMP, M_NOWAIT|M_ZERO);
-       if (pp == NULL)
-               return;
+void dt_prov_kprobe_patch_all_entry(void);
+void dt_prov_kprobe_depatch_all_entry(void);
 
-       pp->pp_name = name;
-       pp->pp_inst = inst;
-       pp->pp_symb = symb;
-
-       SLIST_INSERT_HEAD(&pp_table[INSTTOIDX(pp->pp_inst)], pp, pp_next);
-
-       (*nentries)++;
-}
+vaddr_t db_get_probe_addr(struct trapframe *);
+vaddr_t db_get_pc(struct trapframe *);
 
 int
 db_prof_enable(void)
 {
-#if defined(__amd64__) || defined(__i386__)
-       struct prof_probe *pp;
-       uint8_t patch = BKPT_INST;
-       unsigned long s;
-       int i;
-
-       if (!db_profile)
-               return EPERM;
-
-       if (pp_table == NULL)
-               return ENOENT;
+#if NDT > 0
+       if (!db_profile)
+               return EPERM;
 
-       KASSERT(BKPT_SIZE == SSF_SIZE);
-
-       s = intr_disable();
-       for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) {
-               SLIST_FOREACH(pp, &pp_table[i], pp_next) {
-                       pp->pp_on = 1;
-                       db_write_bytes(pp->pp_inst, BKPT_SIZE, &patch);
-               }
-       }
-       intr_restore(s);
-
-       db_prof_on = 1;
-
-       return 0;
+        dt_prov_kprobe_patch_all_entry();
+        db_prof_on = 1;
+        return 0;
 #else
        return ENOENT;
-#endif
+#endif /* NDT > 0 */
 }
 
 void
 db_prof_disable(void)
 {
-       struct prof_probe *pp;
-       uint8_t patch = SSF_INST;
-       unsigned long s;
-       int i;
-
-       db_prof_on = 0;
-
-       s = intr_disable();
-       for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) {
-               SLIST_FOREACH(pp, &pp_table[i], pp_next) {
-                       db_write_bytes(pp->pp_inst, SSF_SIZE, &patch);
-                       pp->pp_on = 0;
-               }
-       }
-       intr_restore(s);
-}
-
-int
-db_prof_hook(struct trapframe *frame)
-{
-       struct prof_probe *pp;
-       vaddr_t pc, inst;
-
-       if (pp_table == NULL)
-               return 0;
-
-       pc = db_get_pc(frame);
-       inst = db_get_probe_addr(frame);
-
-       SLIST_FOREACH(pp, &pp_table[INSTTOIDX(inst)], pp_next) {
-               if (pp->pp_on && pp->pp_inst == inst) {
-                       if (db_prof_on)
-                               db_prof_count(pc, inst);
-                       return 1;
-               }
-       }
-
-       return 0;
+#if NDT > 0
+    db_prof_on = 0;
+    dt_prov_kprobe_depatch_all_entry();
+#endif /* NDT > 0 */
 }
 
 /*
  * Equivalent to mcount(), must be called with interrupt disabled.
  */
 void
-db_prof_count(unsigned long frompc, unsigned long selfpc)
+db_prof_count(struct trapframe *frame)
 {
        unsigned short *frompcindex;
        struct tostruct *top, *prevtop;
        struct gmonparam *p;
        long toindex;
+    unsigned long frompc, selfpc;
 
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
@@ -232,6 +117,8 @@ db_prof_count(unsigned long frompc, unsigned long selfpc)
        if (p->state != GMON_PROF_ON)
                return;
 
+    frompc = db_get_pc(frame);
+    selfpc = db_get_probe_addr(frame);
        /*
         * check that frompcindex is a reasonable pc value.
         * for example: signal catchers get called from the stack,
@@ -241,6 +128,7 @@ db_prof_count(unsigned long frompc, unsigned long selfpc)
        if (frompc > p->textsize)
                return;
 
+
 #if (HASHFRACTION & (HASHFRACTION - 1)) == 0
        if (p->hashfraction == HASHFRACTION)
                frompcindex =
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
index 51dc5e1cd81..64889ac8225 100644
--- a/sys/dev/dt/dt_dev.c
+++ b/sys/dev/dt/dt_dev.c
@@ -122,7 +122,7 @@ int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat 
*);
 int    dt_ioctl_record_start(struct dt_softc *);
 void   dt_ioctl_record_stop(struct dt_softc *);
 int    dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
-void   dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
+int    dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
 
 int    dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
 
@@ -136,6 +136,7 @@ dtattach(struct device *parent, struct device *self, void 
*aux)
        dt_nprobes += dt_prov_profile_init();
        dt_nprobes += dt_prov_syscall_init();
        dt_nprobes += dt_prov_static_init();
+       dt_nprobes += dt_prov_kprobe_init();
 
        printf("dt: %u probes\n", dt_nprobes);
 }
@@ -276,6 +277,7 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, 
struct proc *p)
                return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
        case DTIOCRECORD:
        case DTIOCPRBENABLE:
+       case DTIOCPRBDISABLE:
                /* root only ioctl(2) */
                break;
        default:
@@ -296,6 +298,9 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, 
struct proc *p)
        case DTIOCPRBENABLE:
                error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
                break;
+       case DTIOCPRBDISABLE:
+               error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
+               break;
        default:
                KASSERT(0);
        }
@@ -479,6 +484,36 @@ dt_ioctl_probe_enable(struct dt_softc *sc, struct 
dtioc_req *dtrq)
        return 0;
 }
 
+int
+dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
+{
+       struct dt_probe *dtp;
+       int error;
+
+       KASSERT(suser(curproc) == 0);
+       if (!dtioc_req_isvalid(dtrq))
+               return EINVAL;
+
+       SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
+               if (dtp->dtp_pbn == dtrq->dtrq_pbn)
+                       break;
+       }
+       if (dtp == NULL)
+               return ENOENT;
+
+       if (dtp->dtp_prov->dtpv_desalloc)
+       {
+               error = dtp->dtp_prov->dtpv_desalloc(dtp, sc, dtrq);
+               if (error)
+                       return error;
+       }
+
+       DPRINTF("dt%d: pid %d desalloc\n", sc->ds_unit, sc->ds_pid,
+                       dtrq->dtrq_pbn);
+
+       return 0;
+}
+
 struct dt_probe *
 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider 
*dtpv)
 {
@@ -493,6 +528,9 @@ dt_dev_alloc_probe(const char *func, const char *name, 
struct dt_provider *dtpv)
        dtp->dtp_func = func;
        dtp->dtp_name = name;
        dtp->dtp_sysnum = -1;
+       dtp->dtp_ref = 0;
+
+       mtx_init(&dtp->dtp_mtx, IPL_HIGH);
 
        return dtp;
 }
diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c
new file mode 100644
index 00000000000..bd7c06e5680
--- /dev/null
+++ b/sys/dev/dt/dt_prov_kprobe.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2020 Tom Rollet <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/atomic.h>
+#include <sys/exec_elf.h>
+
+#include <ddb/db_elf.h>
+#include <machine/db_machdep.h>
+#include <ddb/db_extern.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_interface.h>
+
+#include <dev/dt/dtvar.h>
+
+int
+dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+               struct dt_pcb_list *plist, struct dtioc_req *dtrq);
+int dt_prov_kprobe_hook(struct dt_provider *dtpv, ...);
+int
+dt_prov_kprobe_desalloc(struct dt_probe *dtp, struct dt_softc *sc,
+               struct dtioc_req *dtrq);
+
+void db_prof_count(struct trapframe *frame);
+
+
+struct kprobe_probe
+{
+       struct dt_probe* dtp; 
+       SLIST_ENTRY(kprobe_probe) kprobe_next;
+};
+
+uint32_t hash_tmp( uint32_t a)
+{
+       a = (a+0x7ed55d16) + (a<<12);
+       a = (a^0xc761c23c) ^ (a>>19);
+       a = (a+0x165667b1) + (a<<5);
+       a = (a+0xd3a2646c) ^ (a<<9);
+       a = (a+0xfd7046c5) + (a<<3);
+       a = (a^0xb55a4f09) ^ (a>>16);
+       return a;
+}
+
+#define PPTSIZE                PAGE_SIZE * 30
+#define        PPTMASK         ((PPTSIZE / sizeof(struct kprobe_probe)) - 1)
+#define INSTTOIDX(inst)        ( hash_tmp(inst) & PPTMASK)
+
+SLIST_HEAD(, kprobe_probe) *dtpf_entry;
+SLIST_HEAD(, kprobe_probe) *dtpf_return;
+int nb_probes_entry = 0;
+int nb_probes_return = 0;
+
+
+#define DTEVT_PROV_KPROBE (DTEVT_COMMON|DTEVT_FUNCARGS)
+
+#define KPROBE_ENTRY "entry"
+#define KPROBE_RETURN "return"
+
+#if defined(__amd64__) 
+#define KPROBE_RETGUARD_MOV_1 0x4c
+#define KPROBE_RETGUARD_MOV_2 0x8b
+#define KPROBE_RETGUARD_MOV_3 0x1d
+
+#define KPROBE_RETGUARD_MOV_SIZE 7
+
+#define KPROBE_RETGUARD_XOR_1 0x4c
+#define KPROBE_RETGUARD_XOR_2 0x33
+#define KPROBE_RETGUARD_XOR_3 0x1c
+
+#define KPROBE_RETGUARD_XOR_SIZE 4 
+
+#define RET 0xc3
+#define RET_SIZE 1
+
+#elif defined(__i386__)
+#define POP_RBP 0x5d
+#define POP_RBP_SIZE 1
+#endif
+
+struct dt_provider dt_prov_kprobe = {
+       .dtpv_name    = "kprobe",
+       .dtpv_alloc   = dt_prov_kprobe_alloc,
+       .dtpv_enter   = dt_prov_kprobe_hook,
+       .dtpv_leave   = NULL,
+       .dtpv_desalloc = dt_prov_kprobe_desalloc,
+};
+
+extern db_symtab_t db_symtab;
+extern char __kutext_end[];
+extern int db_prof_on;
+
+/* Initialize all entry and return probes and store them in global arrays */
+       int
+dt_prov_kprobe_init(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+       struct dt_probe *dtp;
+       struct kprobe_probe *kprobe_dtp;
+       Elf_Sym *symp, *symtab_start, *symtab_end;
+       char *strtab, *name;
+       vaddr_t inst, limit;
+       int nb_sym, nb_probes;
+
+
+       nb_sym = (db_symtab.end - db_symtab.start) / sizeof (Elf_Sym);
+       nb_probes = nb_probes_entry = nb_probes_return = 0;
+
+       dtpf_entry = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO);
+       if (dtpf_entry == NULL)
+               goto end;
+
+       dtpf_return = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO);
+       if (dtpf_return == NULL)
+               goto end;
+
+       db_symtab_t *stab = &db_symtab;
+
+       symtab_start = STAB_TO_SYMSTART(stab);
+       symtab_end = STAB_TO_SYMEND(stab);
+
+       strtab = db_elf_find_strtab(stab);
+
+       for (symp = symtab_start; symp < symtab_end; symp++) {
+               if (ELF_ST_TYPE(symp->st_info) != STT_FUNC)
+                       continue;
+
+               inst = symp->st_value;
+               name = strtab + symp->st_name;
+               limit = symp->st_value + symp->st_size;
+
+               /* Filter function that are not mapped in memory */
+               if (inst < KERNBASE || inst >= (vaddr_t)&__kutext_end)
+                       continue;
+
+               /* Remove some function to avoid recursive tracing */
+               if (strncmp(name, "dt_", 3) == 0 || strncmp(name, "trap", 4) == 
0 ||
+                               strncmp(name, "db_", 3) == 0)
+                       continue;
+
+#if defined(__amd64__)
+               /* Find if there is a retguard, if so move the inst pointer to 
the later 'push rbp' */
+               if (*((uint8_t *)inst) != SSF_INST) {
+                       /* No retguards in i386 */
+                       if (((uint8_t *)inst)[0] != KPROBE_RETGUARD_MOV_1 ||
+                               ((uint8_t *)inst)[1] != KPROBE_RETGUARD_MOV_2 ||
+                               ((uint8_t *)inst)[2] != KPROBE_RETGUARD_MOV_3 ||
+                               ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE] != 
KPROBE_RETGUARD_XOR_1 ||
+                               ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 1] 
!= KPROBE_RETGUARD_XOR_2 ||
+                               ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 2] 
!= KPROBE_RETGUARD_XOR_3 ||
+                               ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 
KPROBE_RETGUARD_XOR_SIZE] != SSF_INST)
+                               continue;
+                       inst = (vaddr_t)&(((uint8_t 
*)inst)[KPROBE_RETGUARD_MOV_SIZE + KPROBE_RETGUARD_XOR_SIZE]);
+               }
+#elif defined(__i386__)
+               if (*((uint8_t *)inst) != SSF_INST)
+                       continue;
+#endif
+
+               dtp = dt_dev_alloc_probe(name, KPROBE_ENTRY, &dt_prov_kprobe);
+               if (dtp == NULL)
+                       goto end;
+
+               kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, 
M_NOWAIT|M_ZERO);
+               if (kprobe_dtp == NULL)
+                       goto end;
+               kprobe_dtp->dtp = dtp;
+
+               dtp->dtp_addr = inst;
+               dtp->dtp_nargs = db_ctf_func_numargs(symp);
+               dt_dev_register_probe(dtp);
+
+               SLIST_INSERT_HEAD(&dtpf_entry[INSTTOIDX(dtp->dtp_addr)], 
kprobe_dtp, kprobe_next);
+
+               nb_probes++;
+               nb_probes_entry++;
+
+
+               /*
+                *  Poor method to find the return point
+                *  => we would need a disassembler to find all return points
+                *  For now we start from the end of the function, iterate on
+                *  int3 inserted for retguard until we find a ret
+                */
+#if defined(__amd64__)
+               if (*(uint8_t *)(limit - 1) != RET)
+                       continue;
+               inst = limit - 1;
+#elif defined(__i386__)
+               /*
+                * Little temporary hack to find some return probe
+                *   => always int3 after 'pop %rpb; ret'
+                */
+               while(*((uint8_t *)inst) == 0xcc)
+                       (*(uint8_t *)inst) -= 1;
+               if (*(uint8_t *)(limit - 2) != POP_RBP)
+                       continue;
+               inst = limit - 2;
+#endif
+
+               dtp = dt_dev_alloc_probe(name, KPROBE_RETURN, &dt_prov_kprobe);
+               if (dtp == NULL)
+                       goto end;
+
+               kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, 
M_NOWAIT|M_ZERO);
+               if (kprobe_dtp == NULL)
+                       goto end;
+               kprobe_dtp->dtp = dtp;
+
+               dtp->dtp_addr = inst;
+               dt_dev_register_probe(dtp);
+               SLIST_INSERT_HEAD(&dtpf_return[INSTTOIDX(dtp->dtp_addr)], 
kprobe_dtp, kprobe_next);
+               nb_probes++;
+               nb_probes_return++;
+       }
+#endif
+end:
+       printf("dt kprobe: entry: %d return: %d\n", nb_probes_entry, 
nb_probes_return);
+       return nb_probes;
+}
+
+       int
+dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+               struct dt_pcb_list *plist, struct dtioc_req *dtrq)
+{
+#if defined(__amd64__) || defined(__i386__)
+       uint8_t patch = BKPT_INST;
+       struct dt_pcb *dp;
+       unsigned s;
+
+       dp = dt_pcb_alloc(dtp, sc);
+       if (dp == NULL)
+               return ENOMEM;
+
+       /* Patch only if it's first pcb referencing this probe */
+       mtx_enter(&dtp->dtp_mtx);
+       dtp->dtp_ref++;
+       assert(dtp->dtp_ref != 0);
+
+       if (dtp->dtp_ref == 1) {
+               s = intr_disable();
+               db_write_bytes(dtp->dtp_addr, BKPT_SIZE, &patch);
+               intr_restore(s);
+       }
+       mtx_leave(&dtp->dtp_mtx);
+
+       dp->dp_filter = dtrq->dtrq_filter;
+       dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_KPROBE;
+       TAILQ_INSERT_HEAD(plist, dp, dp_snext);
+       return 0;
+
+#else
+       return ENOENT;
+#endif
+}
+
+       int
+dt_prov_kprobe_desalloc(struct dt_probe *dtp, struct dt_softc *sc,
+               struct dtioc_req *dtrq)
+{
+#if defined(__amd64__) || defined(__i386__)
+       uint8_t patch;
+       int size;
+       unsigned s;
+
+       if (strcmp(dtp->dtp_name, KPROBE_ENTRY) == 0)
+       {
+               patch = SSF_INST;
+               size  = SSF_SIZE;
+       }
+       else if (strcmp(dtp->dtp_name, KPROBE_RETURN) == 0)
+       {
+#if defined(__amd64__)
+               patch = RET;
+               size  = RET_SIZE;
+#elif defined(__i386__)
+               patch = POP_RBP;
+               size  = POP_RBP_SIZE;
+#endif
+       }
+       else
+               assert(0 && "Trying to desalloc not yet implemented probe 
type");
+
+       mtx_enter(&dtp->dtp_mtx);
+       dtp->dtp_ref--;
+
+       if (dtp->dtp_ref == 0) {
+               s = intr_disable();
+               db_write_bytes(dtp->dtp_addr, size, &patch);
+               intr_restore(s);
+       }
+
+       mtx_leave(&dtp->dtp_mtx);
+
+       /* Desallocation of pcb is done by dt_pcb_purge while closing the dev */
+       return 0;
+
+#else
+       return ENOENT;
+#endif
+}
+
+       int
+dt_prov_kprobe_hook(struct dt_provider *dtpv, ...)
+{
+#if defined(__amd64__) || defined(__i386__)
+       struct dt_probe *dtp;
+       struct dt_pcb *dp;
+       va_list ap;
+       struct trapframe *tf;
+       int is_dt_bkpt = 0;
+       struct kprobe_probe *kprobe_dtp;
+       /* Arguments for entry probes */
+       vaddr_t *args;
+       size_t argsize;
+       /* Return values for return probes*/
+       int error;
+       register_t retval[2];
+
+       KASSERT(dtpv == &dt_prov_kprobe);
+
+       va_start(ap, dtpv);
+       tf = va_arg(ap, struct trapframe*);
+       va_end(ap);
+
+#if defined(__amd64__)
+       vaddr_t addr = tf->tf_rip - BKPT_SIZE;
+#elif defined(__i386)
+       vaddr_t addr = tf->tf_eip - BKPT_SIZE;
+#endif
+
+
+       SLIST_FOREACH(kprobe_dtp, &dtpf_entry[INSTTOIDX(addr)], kprobe_next) {
+               dtp = kprobe_dtp->dtp;
+
+               if (dtp->dtp_addr != addr)
+                       continue;
+
+               is_dt_bkpt = 1;
+               if (db_prof_on)
+                       db_prof_count(tf);
+
+               if (!dtp->dtp_recording)
+                       continue;
+
+               smr_read_enter();
+               SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+                       struct dt_evt *dtev;
+
+                       dtev = dt_pcb_ring_get(dp, 0);
+                       if (dtev == NULL)
+                               continue;
+
+#if defined(__amd64__)
+                       args = (vaddr_t *)tf->tf_rdi;
+                       argsize = 6; /* Only 6 first args are passed on 
registers  */
+#elif defined(__i386)
+                       /* All args on stack */
+                       args = (vaddr_t *)(tf->tf_esp + 4);
+                       argsize = 10;
+#endif
+
+                       if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS))
+                               memcpy(dtev->dtev_args, args, argsize);
+
+                       dt_pcb_ring_consume(dp, dtev);
+               }
+               smr_read_leave();
+       }
+
+       if (is_dt_bkpt)
+               return is_dt_bkpt;
+
+       SLIST_FOREACH(kprobe_dtp, &dtpf_return[INSTTOIDX(addr)], kprobe_next) {
+               dtp = kprobe_dtp->dtp;
+
+               if (dtp->dtp_addr != addr)
+                       continue;
+
+               is_dt_bkpt = 2;
+
+               if (!dtp->dtp_recording)
+                       continue;
+
+               smr_read_enter();
+               SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+                       struct dt_evt *dtev;
+
+                       dtev = dt_pcb_ring_get(dp, 0);
+                       if (dtev == NULL)
+                               continue;
+
+#if defined(__amd64__)
+                       retval[0] = tf->tf_rax;
+                       retval[1] = 0;
+                       error = 0;
+#elif defined(__i386)
+                       retval[0] = tf->tf_eax;
+                       retval[1] = 0;
+                       error = 0;
+#endif
+
+                       dtev->dtev_retval[0] = retval[0];
+                       dtev->dtev_retval[1] = retval[1];
+                       dtev->dtev_error = error;
+
+                       dt_pcb_ring_consume(dp, dtev);
+               }
+               smr_read_leave();
+       }
+       return is_dt_bkpt;
+#else
+       return 0;
+#endif
+}
+
+/* Function called by ddb to patch all functions without allocating 1 pcb per 
probe */
+void dt_prov_kprobe_patch_all_entry(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+       uint8_t patch = BKPT_INST;
+       struct dt_probe *dtp;
+       struct kprobe_probe *kprobe_dtp;
+       size_t i;
+
+
+
+       for (i = 0; i < PPTMASK; ++i)
+       {
+               SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) {
+                       dtp = kprobe_dtp->dtp;
+
+                       mtx_enter(&dtp->dtp_mtx);
+                       dtp->dtp_ref++;
+
+                       if (dtp->dtp_ref == 1) {
+                               unsigned s;
+                               s = intr_disable();
+                               db_write_bytes(dtp->dtp_addr, BKPT_SIZE, 
&patch);
+                               intr_restore(s);
+                       }
+
+                       mtx_leave(&dtp->dtp_mtx);
+               }
+       }
+#endif
+}
+
+/* Function called by ddb to patch all functions without allocating 1 pcb per 
probe */
+void dt_prov_kprobe_depatch_all_entry(void)
+{
+#if defined(__amd64__) || defined(__i386__)
+       uint8_t patch = SSF_INST;
+       struct dt_probe *dtp;
+       struct kprobe_probe *kprobe_dtp;
+
+       size_t i;
+
+       for (i = 0; i < PPTMASK; ++i)
+       {
+               SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) {
+                       dtp = kprobe_dtp->dtp;
+
+                       mtx_enter(&dtp->dtp_mtx);
+                       dtp->dtp_ref--;
+
+                       if (dtp->dtp_ref == 0) {
+                               unsigned s;
+                               s = intr_disable();
+                               db_write_bytes(dtp->dtp_addr, SSF_SIZE, &patch);
+                               intr_restore(s);
+                       }
+
+                       mtx_leave(&dtp->dtp_mtx);
+               }
+
+       }
+
+#else
+#endif
+}
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
index 60d59af54f7..4cd48b5f676 100644
--- a/sys/dev/dt/dt_prov_profile.c
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -31,14 +31,15 @@ struct dt_probe     *dtpp_interval;         /* global 
periodic probe */
 
 int    dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
            struct dt_pcb_list *, struct dtioc_req *);
-void   dt_prov_profile_enter(struct dt_provider *, ...);
-void   dt_prov_interval_enter(struct dt_provider *, ...);
+int    dt_prov_profile_enter(struct dt_provider *, ...);
+int    dt_prov_interval_enter(struct dt_provider *, ...);
 
 struct dt_provider dt_prov_profile = {
        .dtpv_name      = "profile",
        .dtpv_alloc     = dt_prov_profile_alloc,
        .dtpv_enter     = dt_prov_profile_enter,
        .dtpv_leave     = NULL,
+       .dtpv_desalloc  = NULL,
 };
 
 struct dt_provider dt_prov_interval = {
@@ -46,6 +47,7 @@ struct dt_provider dt_prov_interval = {
        .dtpv_alloc     = dt_prov_profile_alloc,
        .dtpv_enter     = dt_prov_interval_enter,
        .dtpv_leave     = NULL,
+       .dtpv_desalloc  = NULL,
 };
 
 int
@@ -114,7 +116,7 @@ dt_prov_profile_fire(struct dt_pcb *dp)
        dp->dp_nticks = 0;
 }
 
-void
+int
 dt_prov_profile_enter(struct dt_provider *dtpv, ...)
 {
        struct cpu_info *ci = curcpu();
@@ -130,9 +132,10 @@ dt_prov_profile_enter(struct dt_provider *dtpv, ...)
                dt_prov_profile_fire(dp);
        }
        smr_read_leave();
+       return 0;
 }
 
-void
+int
 dt_prov_interval_enter(struct dt_provider *dtpv, ...)
 {
        struct dt_pcb *dp;
@@ -144,4 +147,5 @@ dt_prov_interval_enter(struct dt_provider *dtpv, ...)
                dt_prov_profile_fire(dp);
        }
        smr_read_leave();
+       return 0;
 }
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
index d7e10542a0c..08d12940236 100644
--- a/sys/dev/dt/dt_prov_static.c
+++ b/sys/dev/dt/dt_prov_static.c
@@ -25,12 +25,13 @@
 
 int    dt_prov_static_alloc(struct dt_probe *, struct dt_softc *,
            struct dt_pcb_list *, struct dtioc_req *);
-void   dt_prov_static_hook(struct dt_provider *, ...);
+int    dt_prov_static_hook(struct dt_provider *, ...);
 
 struct dt_provider dt_prov_static = {
        .dtpv_name = "tracepoint",
        .dtpv_alloc = dt_prov_static_alloc,
        .dtpv_enter = dt_prov_static_hook,
+    .dtpv_desalloc = NULL,
 };
 
 /*
@@ -120,7 +121,7 @@ dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc 
*sc,
        return 0;
 }
 
-void
+int
 dt_prov_static_hook(struct dt_provider *dtpv, ...)
 {
        struct dt_probe *dtp;
@@ -146,13 +147,14 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...)
                if (dtev == NULL)
                        continue;
 
-               dtev->dtev_sysargs[0] = args[0];
-               dtev->dtev_sysargs[1] = args[1];
-               dtev->dtev_sysargs[2] = args[2];
-               dtev->dtev_sysargs[3] = args[3];
-               dtev->dtev_sysargs[4] = args[4];
+               dtev->dtev_args[0] = args[0];
+               dtev->dtev_args[1] = args[1];
+               dtev->dtev_args[2] = args[2];
+               dtev->dtev_args[3] = args[3];
+               dtev->dtev_args[4] = args[4];
 
                dt_pcb_ring_consume(dp, dtev);
        }
        smr_read_leave();
+       return 1;
 }
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
index b2e82551929..0c1c23b290a 100644
--- a/sys/dev/dt/dt_prov_syscall.c
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -37,7 +37,7 @@ unsigned int    dtps_nsysent = SYS_MAXSYSCALL;
 
 int    dt_prov_syscall_alloc(struct dt_probe *, struct dt_softc *,
            struct dt_pcb_list *, struct dtioc_req *);
-void   dt_prov_syscall_entry(struct dt_provider *, ...);
+int    dt_prov_syscall_entry(struct dt_provider *, ...);
 void   dt_prov_syscall_return(struct dt_provider *, ...);
 
 struct dt_provider dt_prov_syscall = {
@@ -45,6 +45,7 @@ struct dt_provider dt_prov_syscall = {
        .dtpv_alloc     = dt_prov_syscall_alloc,
        .dtpv_enter     = dt_prov_syscall_entry,
        .dtpv_leave     = dt_prov_syscall_return,
+       .dtpv_desalloc = NULL,
 };
 
 int
@@ -119,7 +120,7 @@ dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc 
*sc,
        return 0;
 }
 
-void
+int
 dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
 {
        struct dt_probe *dtp;
@@ -136,14 +137,14 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
        args = va_arg(ap, register_t*);
        va_end(ap);
 
-       KASSERT((argsize / sizeof(register_t)) <= DTMAXSYSARGS);
+       KASSERT((argsize / sizeof(register_t)) <= DTMAXFUNCARGS);
 
        if (sysnum < 0 || sysnum >= dtps_nsysent)
-               return;
+               return 0;
 
        dtp = dtps_entry[sysnum];
        if (!dtp->dtp_recording)
-               return;
+               return 0;
 
        smr_read_enter();
        SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
@@ -154,11 +155,12 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
                        continue;
 
                if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS))
-                       memcpy(dtev->dtev_sysargs, args, argsize);
+                       memcpy(dtev->dtev_args, args, argsize);
 
                dt_pcb_ring_consume(dp, dtev);
        }
        smr_read_leave();
+       return 0;
 }
 
 void
@@ -196,13 +198,13 @@ dt_prov_syscall_return(struct dt_provider *dtpv, ...)
                        continue;
 
                if (error) {
-                       dtev->dtev_sysretval[0] = -1;
-                       dtev->dtev_sysretval[1] = 0;
-                       dtev->dtev_syserror = error;
+                       dtev->dtev_retval[0] = -1;
+                       dtev->dtev_retval[1] = 0;
+                       dtev->dtev_error = error;
                } else {
-                       dtev->dtev_sysretval[0] = retval[0];
-                       dtev->dtev_sysretval[1] = retval[1];
-                       dtev->dtev_syserror = 0;
+                       dtev->dtev_retval[0] = retval[0];
+                       dtev->dtev_retval[1] = retval[1];
+                       dtev->dtev_error = 0;
                }
 
                dt_pcb_ring_consume(dp, dtev);
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
index 3e7985692eb..6d2dd1a8110 100644
--- a/sys/dev/dt/dtvar.h
+++ b/sys/dev/dt/dtvar.h
@@ -34,9 +34,9 @@
 #define DTMAXCOMLEN    16
 
 /*
- * Maximum number of arguments passed to a syscall.
+ * Maximum number of arguments passed to a function.
  */
-#define DTMAXSYSARGS   10
+#define DTMAXFUNCARGS  10
 
 /*
  * Event state: where to store information when a probe fires.
@@ -54,15 +54,15 @@ struct dt_evt {
        struct stacktrace       dtev_kstack;    /* kernel stack frame */
        char                    dtev_comm[DTMAXCOMLEN+1]; /* current pr. name */
        union {
-               register_t              E_entry[DTMAXSYSARGS];
+               register_t              E_entry[DTMAXFUNCARGS];
                struct {
                        register_t              __retval[2];
                        int                     __error;
                } E_return;
-       } _sys;
-#define dtev_sysargs   _sys.E_entry            /* syscall args. */
-#define dtev_sysretval _sys.E_return.__retval  /* syscall retval */
-#define dtev_syserror  _sys.E_return.__error   /* syscall error */
+       } _args;
+#define dtev_args      _args.E_entry               /* function args. */
+#define dtev_retval    _args.E_return.__retval /* function retval */
+#define dtev_error     _args.E_return.__error  /* function error */
 
 };
 
@@ -132,6 +132,7 @@ struct dtioc_stat {
 
 #define DTIOCRECORD    _IOW('D', 3, int)
 #define DTIOCPRBENABLE _IOW('D', 4, struct dtioc_req)
+#define DTIOCPRBDISABLE         _IOW('D', 5, struct dtioc_req)
 
 
 #ifdef _KERNEL
@@ -213,12 +214,16 @@ struct dt_probe {
        const char              *dtp_func;      /* [I] probe function */
        const char              *dtp_name;      /* [I] probe name */
        uint32_t                 dtp_pbn;       /* [I] unique ID */
-       volatile uint32_t        dtp_recording; /* [D] is it recording? */
-       uint8_t                  dtp_nargs;     /* [I] # of arguments */
+       volatile uint32_t        dtp_recording; /* [d] is it recording? */
+
+    struct mutex     dtp_mtx;
+    unsigned         dtp_ref; /* [m] number of pcb referencing this probe*/
 
        /* Provider specific fields. */
-       int                      dtp_sysnum;    /* [I] related # of syscall */
-       const char              *dtp_argtype[5];/* [I] type of arguments */
+       int                     dtp_sysnum;     /* [I] related # of syscall */
+       const char      *dtp_argtype[5];/* [I] type of arguments */
+       int                 dtp_nargs;  /* [I] # of arguments */
+       vaddr_t     dtp_addr; /* [I] address of breakpint */
 };
 
 
@@ -231,13 +236,18 @@ struct dt_provider {
 
        int             (*dtpv_alloc)(struct dt_probe *, struct dt_softc *,
                            struct dt_pcb_list *, struct dtioc_req *);
-       void            (*dtpv_enter)(struct dt_provider *, ...);
+       int             (*dtpv_enter)(struct dt_provider *, ...);
        void            (*dtpv_leave)(struct dt_provider *, ...);
+       int             (*dtpv_desalloc)(struct dt_probe *, struct dt_softc *,
+                           struct dtioc_req *);
 };
 
+extern struct dt_provider dt_prov_kprobe;
+
 int             dt_prov_profile_init(void);
 int             dt_prov_syscall_init(void);
 int             dt_prov_static_init(void);
+int             dt_prov_kprobe_init(void);
 
 struct dt_probe *dt_dev_alloc_probe(const char *, const char *,
                    struct dt_provider *);
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 1de0726ab95..3f8d59d7832 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -70,10 +70,6 @@ prof_init(void)
        char *cp;
        int size;
 
-#if !defined(GPROF) && defined(DDBPROF)
-       db_prof_init();
-#endif
-
        /*
         * Round lowpc and highpc to multiples of the density we're using
         * so the rest of the scaling (here and in gprof) stays in ints.
diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c
index 7b77ceb1366..fabbb4f78c0 100644
--- a/usr.sbin/btrace/btrace.c
+++ b/usr.sbin/btrace/btrace.c
@@ -498,19 +498,24 @@ rules_teardown(int fd)
        struct bt_rule *r, *rend = NULL;
        int dokstack = 0, off = 0;
 
+
        if (g_nprobes > 0) {
                if (ioctl(fd, DTIOCRECORD, &off))
                        err(1, "DTIOCRECORD");
        }
 
+
        TAILQ_FOREACH(r, &g_rules, br_next) {
+           dtrq = r->br_cookie;
                if (r->br_type != B_RT_PROBE) {
                        if (r->br_type == B_RT_END)
                                rend = r;
                        continue;
-               }
+        } else {
+            if (ioctl(fd, DTIOCPRBDISABLE, dtrq))
+                err(1, "DTIOCPRBDISABLE");
+        }
 
-               dtrq = r->br_cookie;
                if (dtrq->dtrq_evtflags & DTEVT_KSTACK)
                        dokstack = 1;
        }
@@ -657,8 +662,7 @@ builtin_arg(struct dt_evt *dtev, enum bt_argtype dat)
        static char buf[sizeof("18446744073709551615")]; /* UINT64_MAX */
 
        snprintf(buf, sizeof(buf) - 1, "%lu",
-           dtev->dtev_sysargs[dat - B_AT_BI_ARG0]);
-
+            dtev->dtev_args[dat - B_AT_BI_ARG0]);
        return buf;
 }
 
@@ -1088,7 +1092,7 @@ ba2str(struct bt_arg *ba, struct dt_evt *dtev)
                str = builtin_arg(dtev, ba->ba_type);
                break;
        case B_AT_BI_RETVAL:
-               snprintf(buf, sizeof(buf) - 1, "%ld", 
(long)dtev->dtev_sysretval[0]);
+               snprintf(buf, sizeof(buf) - 1, "%ld", 
(long)dtev->dtev_retval[0]);
                str = buf;
                break;
        case B_AT_MAP:

Reply via email to