From: Wen Yang <[email protected]>

Consolidate per-object DA monitor storage allocation under a
single compile-time selector, replacing the ad-hoc
da_monitor_init_prealloc() API.

Three strategies are provided:

  DA_ALLOC_AUTO   (default) - lock-free kmalloc_nolock on the hot path;
                              unbounded capacity.  Preserves the existing
                              behaviour for all monitors that do not set
                              DA_MON_ALLOCATION_STRATEGY.

  DA_ALLOC_POOL             - pre-allocated fixed-size pool.  Requires the
                              monitor to define DA_MON_POOL_SIZE; enforced
                              with #error.  da_prepare_storage() acquires
                              spinlock_t (O(1), irqsave); must be called
                              from task context on PREEMPT_RT where
                              spinlock_t is a sleeping lock.

  DA_ALLOC_MANUAL           - caller pre-inserts storage via
                              da_create_empty_storage() before the first
                              da_handle_start_event(); the framework only
                              links the target field.  Useful for monitors
                              that allocate storage from known-safe task
                              context (e.g. a syscall path) and then hand
                              it to a tracepoint handler on the hot path.

da_handle_start_event() and da_handle_start_run_event() both call
da_prepare_storage() which resolves at compile time to the correct
allocation function, so no runtime dispatch is needed.

da_monitor_init_prealloc() is removed; da_monitor_init() selects pool
or kmalloc initialisation internally based on the strategy.

A da_extra_cleanup() hook macro is added: the default is a no-op; a
monitor may define it as a function called by da_monitor_destroy() on
each remaining entry before hash_del_rcu().

nomiss is updated to DA_ALLOC_MANUAL: it calls da_create_empty_storage()
from handle_sys_enter() (the sched_setscheduler syscall path, safe
task context), then da_fill_empty_storage() links the sched_dl_entity
target on the first da_handle_start_run_event() call in
handle_sched_switch().

Suggested-by: Gabriele Monaco <[email protected]>
Signed-off-by: Wen Yang <[email protected]>
---
 include/rv/da_monitor.h                  | 276 +++++++++++++++++++++--
 kernel/trace/rv/monitors/nomiss/nomiss.c |   6 +-
 2 files changed, 254 insertions(+), 28 deletions(-)

diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index 34b8fba9ecd4..eb7fc02ecb8a 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -14,6 +14,26 @@
 #ifndef _RV_DA_MONITOR_H
 #define _RV_DA_MONITOR_H
 
+/*
+ * Allocation strategies for RV_MON_PER_OBJ monitors.
+ *
+ * Define DA_MON_ALLOCATION_STRATEGY before including this header.
+ * DA_ALLOC_AUTO   - lock-free kmalloc on the hot path; unbounded capacity.
+ * DA_ALLOC_POOL   - pre-allocated fixed-size pool; requires DA_MON_POOL_SIZE.
+ *                   da_prepare_storage() acquires spinlock_t (O(1), irqsave);
+ *                   must be called from task context on PREEMPT_RT where
+ *                   spinlock_t is a sleeping lock.
+ * DA_ALLOC_MANUAL - caller inserts storage before da_handle_start_event();
+ *                   the framework only links the target field.
+ */
+#define DA_ALLOC_AUTO   0
+#define DA_ALLOC_POOL   1
+#define DA_ALLOC_MANUAL 2
+
+#ifndef DA_MON_ALLOCATION_STRATEGY
+# define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_AUTO
+#endif
+
 #include <rv/automata.h>
 #include <linux/rv.h>
 #include <linux/stringify.h>
@@ -66,6 +86,19 @@ static struct rv_monitor rv_this;
 #define da_monitor_sync_hook()
 #endif
 
+/*
+ * Hook for per-object teardown during da_monitor_destroy().
+ *
+ * Called for each entry still in the hash table when the monitor is
+ * destroyed.  Invoked before da_monitor_reset() and hash_del_rcu(), so
+ * it is safe to call ha_cancel_timer_sync() here.
+ *
+ * Define before including this header.  Default is a no-op.
+ */
+#ifndef da_extra_cleanup
+#define da_extra_cleanup(da_mon)
+#endif
+
 /*
  * Type for the target id, default to int but can be overridden.
  * A long type can work as hash table key (PER_OBJ) but will be downgraded to
@@ -398,6 +431,16 @@ static inline void da_monitor_destroy(void)
  * Functions to define, init and get a per-object monitor.
  */
 
+/*
+ * DA_MON_POOL_SIZE must be defined before this header is included (directly or
+ * transitively via ha_monitor.h) when DA_ALLOC_POOL is selected.  In practice
+ * this means defining it after the monitor's model header (which supplies the
+ * capacity constant) and before the ha_monitor.h include.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL && !defined(DA_MON_POOL_SIZE)
+# error "DA_ALLOC_POOL requires DA_MON_POOL_SIZE to be defined before 
including this header"
+#endif
+
 struct da_monitor_storage {
        da_id_type id;
        monitor_target target;
@@ -495,18 +538,6 @@ static inline da_id_type da_get_id(struct da_monitor 
*da_mon)
        return container_of(da_mon, struct da_monitor_storage, rv.da_mon)->id;
 }
 
-/*
- * da_create_or_get - create the per-object storage if not already there
- *
- * This needs a lookup so should be guarded by RCU, the condition is checked
- * directly in da_create_storage()
- */
-static inline void da_create_or_get(da_id_type id, monitor_target target)
-{
-       guard(rcu)();
-       da_create_storage(id, target, da_get_monitor(id, target));
-}
-
 /*
  * da_fill_empty_storage - store the target in a pre-allocated storage
  *
@@ -537,15 +568,96 @@ static inline monitor_target 
da_get_target_by_id(da_id_type id)
        return mon_storage->target;
 }
 
+/*
+ * Per-object pool state.
+ *
+ * Zero-initialised by default (storage == NULL ⟹ kmalloc mode).  A monitor
+ * opts into pool mode by defining DA_MON_ALLOCATION_STRATEGY DA_ALLOC_POOL
+ * and DA_MON_POOL_SIZE before including this header; da_monitor_init() then
+ * pre-allocates the pool internally.
+ *
+ * Because every field is wrapped in this struct and the struct itself is a
+ * per-TU static, each monitor that includes this header gets a completely
+ * independent pool.  A kmalloc monitor (e.g. nomiss) and a pool monitor
+ * (e.g. tlob) therefore coexist without any interference.
+ *
+ * da_pool_return_cb runs from softirq (non-PREEMPT_RT) or rcuc kthread
+ * (PREEMPT_RT); spin_lock_irqsave handles both.
+ */
+struct da_per_obj_pool {
+       struct da_monitor_storage  *storage;  /* non-NULL ⟹ pool mode */
+       struct da_monitor_storage **free;     /* kmalloc'd pointer stack */
+       unsigned int                free_top;
+       unsigned int                capacity; /* total number of slots */
+       spinlock_t                  lock;
+};
+
+static struct da_per_obj_pool da_pool = {
+       .lock = __SPIN_LOCK_UNLOCKED(da_pool.lock),
+};
+
+static void da_pool_return_cb(struct rcu_head *head)
+{
+       struct da_monitor_storage *ms =
+               container_of(head, struct da_monitor_storage, rcu);
+       unsigned long flags;
+
+       spin_lock_irqsave(&da_pool.lock, flags);
+       if (!WARN_ON_ONCE(!da_pool.free || da_pool.free_top >= 
da_pool.capacity))
+               da_pool.free[da_pool.free_top++] = ms;
+       spin_unlock_irqrestore(&da_pool.lock, flags);
+}
+
+/*
+ * da_create_or_get_pool - pop a slot and insert it into the hash.
+ *
+ * Returns the new da_monitor on success, NULL if the pool is exhausted, or
+ * the existing da_monitor if a concurrent caller already inserted the same id
+ * (in which case the popped slot is returned to the free stack).
+ *
+ * Must be called inside an RCU read-side critical section (guard(rcu)()).
+ */
+static inline struct da_monitor *
+da_create_or_get_pool(da_id_type id, monitor_target target)
+{
+       struct da_monitor_storage *mon_storage, *existing;
+       unsigned long flags;
+
+       spin_lock_irqsave(&da_pool.lock, flags);
+       if (!da_pool.free_top) {
+               spin_unlock_irqrestore(&da_pool.lock, flags);
+               return NULL;
+       }
+       mon_storage = da_pool.free[--da_pool.free_top];
+       spin_unlock_irqrestore(&da_pool.lock, flags);
+
+       mon_storage->id = id;
+       mon_storage->target = target;
+
+       /*
+        * A concurrent caller may have inserted the same id between our 
spinlock
+        * release and here.  Return the slot to the pool and yield to the 
winner.
+        */
+       existing = __da_get_mon_storage(id);
+       if (unlikely(existing)) {
+               spin_lock_irqsave(&da_pool.lock, flags);
+               da_pool.free[da_pool.free_top++] = mon_storage;
+               spin_unlock_irqrestore(&da_pool.lock, flags);
+               return &existing->rv.da_mon;
+       }
+       hash_add_rcu(da_monitor_ht, &mon_storage->node, id);
+       return &mon_storage->rv.da_mon;
+}
+
+
 /*
  * da_destroy_storage - destroy the per-object storage
  *
- * The caller is responsible to synchronise writers, either with locks or
- * implicitly. For instance, if da_destroy_storage is called at sched_exit and
- * da_create_storage can never occur after that, it's safe to call this without
- * locks.
- * This function includes an RCU read-side critical section to synchronise
- * against da_monitor_destroy().
+ * Pool mode: removes from hash and returns the slot via call_rcu().
+ * Kmalloc mode: removes from hash and frees via kfree_rcu().
+ *
+ * Includes an RCU read-side critical section to synchronise against
+ * da_monitor_destroy().
  */
 static inline void da_destroy_storage(da_id_type id)
 {
@@ -558,7 +670,11 @@ static inline void da_destroy_storage(da_id_type id)
                return;
        da_monitor_reset_hook(&mon_storage->rv.da_mon);
        hash_del_rcu(&mon_storage->node);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+       call_rcu(&mon_storage->rcu, da_pool_return_cb);
+#else
        kfree_rcu(mon_storage, rcu);
+#endif
 }
 
 static void __da_monitor_reset_all(void (*reset)(struct da_monitor *))
@@ -581,13 +697,87 @@ static inline void da_monitor_reset_state_all(void)
        __da_monitor_reset_all(da_monitor_reset_state);
 }
 
+/* Not part of the public API; called by da_monitor_init() for DA_ALLOC_POOL. 
*/
+static inline int __da_monitor_init_pool(unsigned int prealloc_count)
+{
+       da_pool.storage = kcalloc(prealloc_count, sizeof(*da_pool.storage),
+                                 GFP_KERNEL);
+       if (!da_pool.storage)
+               return -ENOMEM;
+
+       da_pool.free = kmalloc_array(prealloc_count, sizeof(*da_pool.free),
+                                    GFP_KERNEL);
+       if (!da_pool.free) {
+               kfree(da_pool.storage);
+               da_pool.storage = NULL;
+               return -ENOMEM;
+       }
+
+       da_pool.capacity = prealloc_count;
+       da_pool.free_top = 0;
+       for (unsigned int i = 0; i < prealloc_count; i++)
+               da_pool.free[da_pool.free_top++] = &da_pool.storage[i];
+       return 0;
+}
+
+/*
+ * da_monitor_init - initialise the per-object monitor
+ *
+ * Selects the allocation path at compile time based on 
DA_MON_ALLOCATION_STRATEGY:
+ *   DA_ALLOC_POOL   - pre-allocates DA_MON_POOL_SIZE storage slots.
+ *   DA_ALLOC_AUTO / DA_ALLOC_MANUAL - initialises the hash table only.
+ */
 static inline int da_monitor_init(void)
 {
        hash_init(da_monitor_ht);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+       return __da_monitor_init_pool(DA_MON_POOL_SIZE);
+#else
        return 0;
+#endif
 }
 
-static inline void da_monitor_destroy(void)
+static inline void da_monitor_destroy_pool(void)
+{
+       struct da_monitor_storage *ms;
+       struct hlist_node *tmp;
+       int bkt;
+
+       /*
+        * Ensure all in-flight tracepoint handlers that may hold a raw pointer
+        * to a pool slot (e.g. tlob_stop_task after its RCU guard exits) have
+        * completed before we begin tearing down the pool.  Mirrors the same
+        * call in da_monitor_destroy_kmalloc().
+        */
+       tracepoint_synchronize_unregister();
+
+       /*
+        * Drain any entries that were not stopped before destroy (e.g.
+        * uprobe-started sessions whose stop probe never fired).  Call
+        * da_extra_cleanup() before hash_del_rcu() so the hook may safely
+        * call ha_cancel_timer_sync() while the monitor is still reachable.
+        */
+       hash_for_each_safe(da_monitor_ht, bkt, tmp, ms, node) {
+               da_extra_cleanup(&ms->rv.da_mon);
+               hash_del_rcu(&ms->node);
+               call_rcu(&ms->rcu, da_pool_return_cb);
+       }
+
+       /*
+        * rcu_barrier() drains every pending call_rcu() callback, including
+        * both da_pool_return_cb() and any monitor-specific free callbacks
+        * (e.g. tlob_free_rcu) enqueued by da_extra_cleanup().
+        */
+       rcu_barrier();
+       kfree(da_pool.storage);
+       da_pool.storage = NULL;
+       kfree(da_pool.free);
+       da_pool.free = NULL;
+       da_pool.free_top = 0;
+       da_pool.capacity = 0;
+}
+
+static inline void da_monitor_destroy_kmalloc(void)
 {
        struct da_monitor_storage *mon_storage;
        struct hlist_node *tmp;
@@ -607,15 +797,51 @@ static inline void da_monitor_destroy(void)
 }
 
 /*
- * Allow the per-object monitors to run allocation manually, necessary if the
- * start condition is in a context problematic for allocation (e.g. 
scheduling).
- * In such case, if the storage was pre-allocated without a target, set it now.
+ * da_monitor_destroy - tear down the per-object monitor
+ *
+ * DA_ALLOC_POOL: calls tracepoint_synchronize_unregister() to drain any
+ * in-flight handlers, then iterates the hash draining remaining entries via
+ * da_extra_cleanup() + hash_del_rcu() + call_rcu(), then rcu_barrier() to
+ * wait for all pending da_pool_return_cb() callbacks before freeing the pool.
+ * DA_ALLOC_AUTO / DA_ALLOC_MANUAL: drains remaining entries after
+ * tracepoint_synchronize_unregister() + synchronize_rcu().
  */
-#ifdef DA_SKIP_AUTO_ALLOC
-#define da_prepare_storage da_fill_empty_storage
+static inline void da_monitor_destroy(void)
+{
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+       da_monitor_destroy_pool();
 #else
+       da_monitor_destroy_kmalloc();
+#endif
+}
+
+/*
+ * da_prepare_storage - obtain (or create) the da_monitor for (id, target)
+ *
+ * The implementation is selected at compile time by 
DA_MON_ALLOCATION_STRATEGY:
+ *
+ * DA_ALLOC_AUTO   - calls da_create_storage() (lock-free kmalloc_nolock).
+ * DA_ALLOC_POOL   - if an entry already exists, returns it; otherwise pops a
+ *                   slot from the pre-allocated pool and re-looks it up.
+ *                   Returns NULL if the pool is exhausted.
+ * DA_ALLOC_MANUAL - caller has already inserted storage via 
da_create_empty_storage();
+ *                   only fills in the target field if it was left NULL.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+static inline struct da_monitor *da_prepare_storage(da_id_type id,
+                                                    monitor_target target,
+                                                    struct da_monitor *da_mon)
+{
+       if (da_mon)
+               return da_mon;
+       /* da_create_or_get_pool() returns the da_monitor directly; no 
re-lookup needed. */
+       return da_create_or_get_pool(id, target);
+}
+#elif DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_MANUAL
+#define da_prepare_storage da_fill_empty_storage
+#else /* DA_ALLOC_AUTO */
 #define da_prepare_storage da_create_storage
-#endif /* DA_SKIP_AUTO_ALLOC */
+#endif
 
 #endif /* RV_MON_TYPE */
 
diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.c 
b/kernel/trace/rv/monitors/nomiss/nomiss.c
index 8ead8783c29f..ac4d334e757f 100644
--- a/kernel/trace/rv/monitors/nomiss/nomiss.c
+++ b/kernel/trace/rv/monitors/nomiss/nomiss.c
@@ -17,8 +17,8 @@
 
 #define RV_MON_TYPE RV_MON_PER_OBJ
 #define HA_TIMER_TYPE HA_TIMER_WHEEL
-/* The start condition is on sched_switch, it's dangerous to allocate there */
-#define DA_SKIP_AUTO_ALLOC
+/* Allocate storage in sched_setscheduler; sched_switch is too hot to alloc. */
+#define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_MANUAL
 typedef struct sched_dl_entity *monitor_target;
 #include "nomiss.h"
 #include <rv/ha_monitor.h>
@@ -214,7 +214,7 @@ static void handle_sys_enter(void *data, struct pt_regs 
*regs, long id)
        if (p->policy == SCHED_DEADLINE)
                da_reset(EXPAND_ID_TASK(p));
        else if (new_policy == SCHED_DEADLINE)
-               da_create_or_get(EXPAND_ID_TASK(p));
+               da_create_empty_storage(get_entity_id(&p->dl, task_cpu(p), 
DL_TASK));
 }
 
 static void handle_sched_wakeup(void *data, struct task_struct *tsk)
-- 
2.43.0


Reply via email to