This is the next patch in the clock interrupt reorganization series.

This patch moves the entry points for the interval and profile dt(4)
providers from the hardclock(9) to a dedicated clock interrupt
callback, dt_prov_profile_intr(), in dev/dt/dt_prov_profile.c.

- To preserve current behavior, (1) both provider entrypoints have
  been moved into a single callback function, (2) the interrupt runs at
  the same frequency as the hardclock, and (3) the interrupt is
  staggered to co-occur with the hardclock on a given CPU.

  All of this can be changed later.  This patch is strictly
  logistical: moving these parts out of the hardclock.

- Each dt_pcb gets a provider-specific "dp_clockintr" pointer.  If the
  PCB's implementing provider needs a clock interrupt to do its work, it
  stores the handle in dp_clockintr.  The handle, if any, is established
  during dtpv_alloc() and disestablished during dtpv_dealloc().

  Only the interval and profile providers use it at present.

- In order to implement dt_prov_profile_dealloc() I needed a complete
  definition of struct dt_softc, so I hoisted it up out of dt_dev.c
  into dtvar.h.

- A PCB's periodic clock interrupt, if any, is started during
  dt_ioctl_record_start() stopped during dt_ioctl_record_stop().
  This is not a provider-agnostic approach, but I don't see where
  else to start/stop the clock interrupt.

  One alternative is to start running the clock interrupts when they
  are allocated in dtpv_alloc() and stop them when they are freed in
  dtpv_dealloc().  This is wasteful, though: the PCBs are not recording
  yet, so the interrupts won't perform any useful work until the
  controlling process enables recording.

  An additional pair of provider hooks, e.g. "dtpv_record_start" and
  "dtpv_record_stop", might resolve this.

- We haven't needed to destroy clock interrupts yet, so the
  clockintr_disestablish() function used in this patch is new.

  The implementation is extremely similar to that of clockintr_cancel().
  However, for sake of simplicity, a callback may not disestablish its
  clockintr while the callback is running.

  One tricky part: if a clockintr is disestablished while it is running,
  the dispatch thread takes care not to use-after-free when it re-enters
  the clockintr_queue mutex.

I have tested this on amd64.  It seems to work: the clock interrupts
fire, stack traces are recorded, and the handles are deallocated when
the process closes the file descriptor.

I will be testing it on other platforms over the next few days.

Thoughts?  Test results?

Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.32
diff -u -p -r1.32 kern_clockintr.c
--- kern/kern_clockintr.c       21 Aug 2023 17:22:04 -0000      1.32
+++ kern/kern_clockintr.c       23 Aug 2023 23:35:11 -0000
@@ -218,7 +218,7 @@ clockintr_dispatch(void *frame)
 {
        uint64_t lateness, run = 0, start;
        struct cpu_info *ci = curcpu();
-       struct clockintr *cl;
+       struct clockintr *cl, *shadow;
        struct clockintr_queue *cq = &ci->ci_queue;
        u_int ogen;
 
@@ -257,22 +257,26 @@ clockintr_dispatch(void *frame)
                                break;
                }
                clockintr_cancel_locked(cl);
-               cq->cq_shadow.cl_expiration = cl->cl_expiration;
+               shadow = &cq->cq_shadow;
+               shadow->cl_expiration = cl->cl_expiration;
+               shadow->cl_func = cl->cl_func;
                cq->cq_running = cl;
                mtx_leave(&cq->cq_mtx);
 
-               cl->cl_func(&cq->cq_shadow, frame);
+               shadow->cl_func(shadow, frame);
 
                mtx_enter(&cq->cq_mtx);
-               cq->cq_running = NULL;
-               if (ISSET(cl->cl_flags, CLST_IGNORE_SHADOW)) {
-                       CLR(cl->cl_flags, CLST_IGNORE_SHADOW);
-                       CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING);
-               }
-               if (ISSET(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING)) {
-                       CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING);
-                       clockintr_schedule_locked(cl,
-                           cq->cq_shadow.cl_expiration);
+               if (cq->cq_running != NULL) {
+                       cq->cq_running = NULL;
+                       if (ISSET(cl->cl_flags, CLST_IGNORE_SHADOW)) {
+                               CLR(cl->cl_flags, CLST_IGNORE_SHADOW);
+                               CLR(shadow->cl_flags, CLST_SHADOW_PENDING);
+                       }
+                       if (ISSET(shadow->cl_flags, CLST_SHADOW_PENDING)) {
+                               CLR(shadow->cl_flags, CLST_SHADOW_PENDING);
+                               clockintr_schedule_locked(cl,
+                                   shadow->cl_expiration);
+                       }
                }
                run++;
        }
@@ -382,6 +386,34 @@ clockintr_cancel_locked(struct clockintr
 
        TAILQ_REMOVE(&cq->cq_pend, cl, cl_plink);
        CLR(cl->cl_flags, CLST_PENDING);
+}
+
+void
+clockintr_disestablish(struct clockintr *cl)
+{
+       struct clockintr_queue *cq = cl->cl_queue;
+       int was_next;
+
+       if (cl == &cq->cq_shadow)
+               panic("%s: called during dispatch\n", __func__);
+
+       mtx_enter(&cq->cq_mtx);
+       if (ISSET(cl->cl_flags, CLST_PENDING)) {
+               was_next = cl == TAILQ_FIRST(&cq->cq_pend);
+               clockintr_cancel_locked(cl);
+               if (ISSET(cq->cq_flags, CQ_INTRCLOCK)) {
+                       if (was_next && !TAILQ_EMPTY(&cq->cq_pend)) {
+                               if (cq == &curcpu()->ci_queue)
+                                       clockqueue_reset_intrclock(cq);
+                       }
+               }
+       }
+       if (cl == cq->cq_running)
+               cq->cq_running = NULL;
+       TAILQ_REMOVE(&cq->cq_est, cl, cl_elink);
+       mtx_leave(&cq->cq_mtx);
+
+       free(cl, M_DEVBUF, sizeof *cl);
 }
 
 struct clockintr *
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.115
diff -u -p -r1.115 kern_clock.c
--- kern/kern_clock.c   23 Aug 2023 01:55:45 -0000      1.115
+++ kern/kern_clock.c   23 Aug 2023 23:35:11 -0000
@@ -49,11 +49,6 @@
 #include <sys/sched.h>
 #include <sys/timetc.h>
 
-#include "dt.h"
-#if NDT > 0
-#include <dev/dt/dtvar.h>
-#endif
-
 /*
  * Clock handling routines.
  *
@@ -114,12 +109,6 @@ initclocks(void)
 void
 hardclock(struct clockframe *frame)
 {
-#if NDT > 0
-       DT_ENTER(profile, NULL);
-       if (CPU_IS_PRIMARY(curcpu()))
-               DT_ENTER(interval, NULL);
-#endif
-
        /*
         * If we are not the primary CPU, we're not allowed to do
         * any more work.
Index: sys/clockintr.h
===================================================================
RCS file: /cvs/src/sys/sys/clockintr.h,v
retrieving revision 1.10
diff -u -p -r1.10 clockintr.h
--- sys/clockintr.h     21 Aug 2023 17:22:04 -0000      1.10
+++ sys/clockintr.h     23 Aug 2023 23:35:11 -0000
@@ -128,6 +128,7 @@ void clockintr_trigger(void);
 
 uint64_t clockintr_advance(struct clockintr *, uint64_t);
 void clockintr_cancel(struct clockintr *);
+void clockintr_disestablish(struct clockintr *);
 struct clockintr *clockintr_establish(struct clockintr_queue *,
     void (*)(struct clockintr *, void *));
 void clockintr_stagger(struct clockintr *, uint64_t, u_int, u_int);
Index: dev/dt/dt_prov_profile.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_profile.c,v
retrieving revision 1.5
diff -u -p -r1.5 dt_prov_profile.c
--- dev/dt/dt_prov_profile.c    26 Apr 2023 16:53:59 -0000      1.5
+++ dev/dt/dt_prov_profile.c    23 Aug 2023 23:35:13 -0000
@@ -20,6 +20,7 @@
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/atomic.h>
+#include <sys/clockintr.h>
 
 #include <dev/dt/dtvar.h>
 
@@ -31,6 +32,8 @@ struct dt_probe       *dtpp_interval;         /* glob
 
 int    dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
            struct dt_pcb_list *, struct dtioc_req *);
+int    dt_prov_profile_dealloc(struct dt_probe *, struct dt_softc *,
+           struct dtioc_req *);
 int    dt_prov_profile_enter(struct dt_provider *, ...);
 int    dt_prov_interval_enter(struct dt_provider *, ...);
 
@@ -39,7 +42,7 @@ struct dt_provider dt_prov_profile = {
        .dtpv_alloc     = dt_prov_profile_alloc,
        .dtpv_enter     = dt_prov_profile_enter,
        .dtpv_leave     = NULL,
-       .dtpv_dealloc   = NULL,
+       .dtpv_dealloc   = dt_prov_profile_dealloc,
 };
 
 struct dt_provider dt_prov_interval = {
@@ -47,9 +50,11 @@ struct dt_provider dt_prov_interval = {
        .dtpv_alloc     = dt_prov_profile_alloc,
        .dtpv_enter     = dt_prov_interval_enter,
        .dtpv_leave     = NULL,
-       .dtpv_dealloc   = NULL,
+       .dtpv_dealloc   = dt_prov_profile_dealloc,
 };
 
+void dt_prov_profile_intr(struct clockintr *, void *);
+
 int
 dt_prov_profile_init(void)
 {
@@ -85,10 +90,17 @@ dt_prov_profile_alloc(struct dt_probe *d
                        continue;
 
                dp = dt_pcb_alloc(dtp, sc);
-               if (dp == NULL) {
-                       dt_pcb_purge(plist);
-                       return ENOMEM;
+               if (dp == NULL)
+                       goto purge;
+
+               dp->dp_clockintr = clockintr_establish(&ci->ci_queue,
+                   dt_prov_profile_intr);
+               if (dp->dp_clockintr == NULL) {
+                       dt_pcb_free(dp);
+                       goto purge;
                }
+               clockintr_stagger(dp->dp_clockintr, hardclock_period,
+                   CPU_INFO_UNIT(ci), MAXCPUS);
 
                dp->dp_maxtick = hz / dtrq->dtrq_rate;
                dp->dp_cpuid = ci->ci_cpuid;
@@ -99,6 +111,31 @@ dt_prov_profile_alloc(struct dt_probe *d
        }
 
        return 0;
+purge:
+       TAILQ_FOREACH(dp, plist, dp_snext)
+               clockintr_disestablish(dp->dp_clockintr);
+       dt_pcb_purge(plist);
+       return ENOMEM;
+}
+
+int
+dt_prov_profile_dealloc(struct dt_probe *dtp, struct dt_softc *sc,
+    struct dtioc_req *dtrq)
+{
+       struct dt_pcb *dp;
+
+       KASSERT(dtioc_req_isvalid(dtrq));
+       KASSERT(dtp == dtpp_profile || dtp == dtpp_interval);
+
+       TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
+               if (dp->dp_dtp == dtp) {
+                       if (dp->dp_clockintr != NULL) {
+                               clockintr_disestablish(dp->dp_clockintr);
+                               dp->dp_clockintr = NULL;
+                       }
+               }
+       }
+       return 0;
 }
 
 static inline void
@@ -148,4 +185,18 @@ dt_prov_interval_enter(struct dt_provide
        }
        smr_read_leave();
        return 0;
+}
+
+void
+dt_prov_profile_intr(struct clockintr *cl, void *cf)
+{
+       uint64_t count, i;
+       struct cpu_info *ci = curcpu();
+
+       count = clockintr_advance(cl, hardclock_period);
+       for (i = 0; i < count; i++) {
+               DT_ENTER(profile, NULL);
+               if (CPU_IS_PRIMARY(ci))
+                       DT_ENTER(interval, NULL);
+       }
 }
Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.28
diff -u -p -r1.28 dt_dev.c
--- dev/dt/dt_dev.c     14 Jul 2023 07:07:08 -0000      1.28
+++ dev/dt/dt_dev.c     23 Aug 2023 23:35:14 -0000
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
+#include <sys/clockintr.h>
 #include <sys/device.h>
 #include <sys/exec_elf.h>
 #include <sys/malloc.h>
@@ -82,32 +83,6 @@
 
 #define DPRINTF(x...) /* nothing */
 
-/*
- * Descriptor associated with each program opening /dev/dt.  It is used
- * to keep track of enabled PCBs.
- *
- *  Locks used to protect struct members in this file:
- *     m       per-softc mutex
- *     K       kernel lock
- */
-struct dt_softc {
-       SLIST_ENTRY(dt_softc)    ds_next;       /* [K] descriptor list */
-       int                      ds_unit;       /* [I] D_CLONE unique unit */
-       pid_t                    ds_pid;        /* [I] PID of tracing program */
-
-       struct mutex             ds_mtx;
-
-       struct dt_pcb_list       ds_pcbs;       /* [K] list of enabled PCBs */
-       struct dt_evt           *ds_bufqueue;   /* [K] copy evts to userland */
-       size_t                   ds_bufqlen;    /* [K] length of the queue */
-       int                      ds_recording;  /* [K] currently recording? */
-       int                      ds_evtcnt;     /* [m] # of readable evts */
-
-       /* Counters */
-       uint64_t                 ds_readevt;    /* [m] # of events read */
-       uint64_t                 ds_dropevt;    /* [m] # of events dropped */
-};
-
 SLIST_HEAD(, dt_softc) dtdev_list;     /* [K] list of open /dev/dt nodes */
 
 /*
@@ -486,6 +461,14 @@ dt_ioctl_record_start(struct dt_softc *s
                SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
                dtp->dtp_recording++;
                dtp->dtp_prov->dtpv_recording++;
+
+               /*
+                * XXX Not provider-agnostic.  Is there a better place
+                * to do this?
+                */
+               if (dp->dp_clockintr != NULL)
+                       clockintr_advance(dp->dp_clockintr, hardclock_period);
+               
        }
        rw_exit_write(&dt_lock);
 
@@ -514,6 +497,11 @@ dt_ioctl_record_stop(struct dt_softc *sc
 
                dtp->dtp_recording--;
                dtp->dtp_prov->dtpv_recording--;
+
+               /* XXX This is not provider-agnostic. */
+               if (dp->dp_clockintr != NULL)
+                       clockintr_cancel(dp->dp_clockintr);
+
                SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
        }
        rw_exit_write(&dt_lock);
Index: dev/dt/dtvar.h
===================================================================
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
retrieving revision 1.17
diff -u -p -r1.17 dtvar.h
--- dev/dt/dtvar.h      26 Apr 2023 16:53:59 -0000      1.17
+++ dev/dt/dtvar.h      23 Aug 2023 23:35:14 -0000
@@ -158,12 +158,39 @@ struct dtioc_getaux {
 #include <sys/queue.h>
 #include <sys/smr.h>
 
+struct dt_pcb;
+TAILQ_HEAD(dt_pcb_list, dt_pcb);
+
 /* Flags that make sense for all providers. */
 #define DTEVT_COMMON   (DTEVT_EXECNAME|DTEVT_KSTACK|DTEVT_USTACK)
 
 #define M_DT M_DEVBUF /* XXX FIXME */
 
-struct dt_softc;
+/*
+ * Descriptor associated with each program opening /dev/dt.  It is used
+ * to keep track of enabled PCBs.
+ *
+ *  Locks used to protect struct members in this file:
+ *     m       per-softc mutex
+ *     K       kernel lock
+ */
+struct dt_softc {
+       SLIST_ENTRY(dt_softc)    ds_next;       /* [K] descriptor list */
+       int                      ds_unit;       /* [I] D_CLONE unique unit */
+       pid_t                    ds_pid;        /* [I] PID of tracing program */
+
+       struct mutex             ds_mtx;
+
+       struct dt_pcb_list       ds_pcbs;       /* [K] list of enabled PCBs */
+       struct dt_evt           *ds_bufqueue;   /* [K] copy evts to userland */
+       size_t                   ds_bufqlen;    /* [K] length of the queue */
+       int                      ds_recording;  /* [K] currently recording? */
+       int                      ds_evtcnt;     /* [m] # of readable evts */
+
+       /* Counters */
+       uint64_t                 ds_readevt;    /* [m] # of events read */
+       uint64_t                 ds_dropevt;    /* [m] # of events dropped */
+};
 
 int            dtioc_req_isvalid(struct dtioc_req *);
 
@@ -197,6 +224,7 @@ struct dt_pcb {
        struct dt_filter         dp_filter;     /* [I] filter to match */
 
        /* Provider specific fields. */
+       struct clockintr        *dp_clockintr;  /* [?] clock interrupt handle */
        unsigned int             dp_cpuid;      /* [I] on which CPU */
        unsigned int             dp_maxtick;    /* [I] freq. of profiling */
        unsigned int             dp_nticks;     /* [c] current tick count */
@@ -204,8 +232,6 @@ struct dt_pcb {
        /* Counters */
        uint64_t                 dp_dropevt;    /* [m] # dropped event */
 };
-
-TAILQ_HEAD(dt_pcb_list, dt_pcb);
 
 struct dt_pcb  *dt_pcb_alloc(struct dt_probe *, struct dt_softc *);
 void            dt_pcb_free(struct dt_pcb *);

Reply via email to