From: Wen Yang <[email protected]>
Consolidate per-object DA monitor storage allocation under a
single compile-time selector, replacing the ad-hoc
da_monitor_init_prealloc() API.
Three strategies are provided:
DA_ALLOC_AUTO (default) - lock-free kmalloc_nolock on the hot path;
unbounded capacity. Preserves the existing
behaviour for all monitors that do not set
DA_MON_ALLOCATION_STRATEGY.
DA_ALLOC_POOL - pre-allocated fixed-size pool. Requires the
monitor to define DA_MON_POOL_SIZE; enforced
with #error. da_prepare_storage() acquires
spinlock_t (O(1), irqsave); must be called
from task context on PREEMPT_RT where
spinlock_t is a sleeping lock.
DA_ALLOC_MANUAL - caller pre-inserts storage via
da_create_empty_storage() before the first
da_handle_start_event(); the framework only
links the target field. Useful for monitors
that allocate storage from known-safe task
context (e.g. a syscall path) and then hand
it to a tracepoint handler on the hot path.
da_handle_start_event() and da_handle_start_run_event() both call
da_prepare_storage() which resolves at compile time to the correct
allocation function, so no runtime dispatch is needed.
da_monitor_init_prealloc() is removed; da_monitor_init() selects pool
or kmalloc initialisation internally based on the strategy.
A da_extra_cleanup() hook macro is added: the default is a no-op; a
monitor may define it as a function called by da_monitor_destroy() on
each remaining entry before hash_del_rcu().
nomiss is updated to DA_ALLOC_MANUAL: it calls da_create_empty_storage()
from handle_sys_enter() (the sched_setscheduler syscall path, safe
task context), then da_fill_empty_storage() links the sched_dl_entity
target on the first da_handle_start_run_event() call in
handle_sched_switch().
Suggested-by: Gabriele Monaco <[email protected]>
Signed-off-by: Wen Yang <[email protected]>
---
include/rv/da_monitor.h | 276 +++++++++++++++++++++--
kernel/trace/rv/monitors/nomiss/nomiss.c | 6 +-
2 files changed, 254 insertions(+), 28 deletions(-)
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index 34b8fba9ecd4..eb7fc02ecb8a 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -14,6 +14,26 @@
#ifndef _RV_DA_MONITOR_H
#define _RV_DA_MONITOR_H
+/*
+ * Allocation strategies for RV_MON_PER_OBJ monitors.
+ *
+ * Define DA_MON_ALLOCATION_STRATEGY before including this header.
+ * DA_ALLOC_AUTO - lock-free kmalloc on the hot path; unbounded capacity.
+ * DA_ALLOC_POOL - pre-allocated fixed-size pool; requires DA_MON_POOL_SIZE.
+ * da_prepare_storage() acquires spinlock_t (O(1), irqsave);
+ * must be called from task context on PREEMPT_RT where
+ * spinlock_t is a sleeping lock.
+ * DA_ALLOC_MANUAL - caller inserts storage before da_handle_start_event();
+ * the framework only links the target field.
+ */
+#define DA_ALLOC_AUTO 0
+#define DA_ALLOC_POOL 1
+#define DA_ALLOC_MANUAL 2
+
+#ifndef DA_MON_ALLOCATION_STRATEGY
+# define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_AUTO
+#endif
+
#include <rv/automata.h>
#include <linux/rv.h>
#include <linux/stringify.h>
@@ -66,6 +86,19 @@ static struct rv_monitor rv_this;
#define da_monitor_sync_hook()
#endif
+/*
+ * Hook for per-object teardown during da_monitor_destroy().
+ *
+ * Called for each entry still in the hash table when the monitor is
+ * destroyed. Invoked before da_monitor_reset() and hash_del_rcu(), so
+ * it is safe to call ha_cancel_timer_sync() here.
+ *
+ * Define before including this header. Default is a no-op.
+ */
+#ifndef da_extra_cleanup
+#define da_extra_cleanup(da_mon)
+#endif
+
/*
* Type for the target id, default to int but can be overridden.
* A long type can work as hash table key (PER_OBJ) but will be downgraded to
@@ -398,6 +431,16 @@ static inline void da_monitor_destroy(void)
* Functions to define, init and get a per-object monitor.
*/
+/*
+ * DA_MON_POOL_SIZE must be defined before this header is included (directly or
+ * transitively via ha_monitor.h) when DA_ALLOC_POOL is selected. In practice
+ * this means defining it after the monitor's model header (which supplies the
+ * capacity constant) and before the ha_monitor.h include.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL && !defined(DA_MON_POOL_SIZE)
+# error "DA_ALLOC_POOL requires DA_MON_POOL_SIZE to be defined before
including this header"
+#endif
+
struct da_monitor_storage {
da_id_type id;
monitor_target target;
@@ -495,18 +538,6 @@ static inline da_id_type da_get_id(struct da_monitor
*da_mon)
return container_of(da_mon, struct da_monitor_storage, rv.da_mon)->id;
}
-/*
- * da_create_or_get - create the per-object storage if not already there
- *
- * This needs a lookup so should be guarded by RCU, the condition is checked
- * directly in da_create_storage()
- */
-static inline void da_create_or_get(da_id_type id, monitor_target target)
-{
- guard(rcu)();
- da_create_storage(id, target, da_get_monitor(id, target));
-}
-
/*
* da_fill_empty_storage - store the target in a pre-allocated storage
*
@@ -537,15 +568,96 @@ static inline monitor_target
da_get_target_by_id(da_id_type id)
return mon_storage->target;
}
+/*
+ * Per-object pool state.
+ *
+ * Zero-initialised by default (storage == NULL ⟹ kmalloc mode). A monitor
+ * opts into pool mode by defining DA_MON_ALLOCATION_STRATEGY DA_ALLOC_POOL
+ * and DA_MON_POOL_SIZE before including this header; da_monitor_init() then
+ * pre-allocates the pool internally.
+ *
+ * Because every field is wrapped in this struct and the struct itself is a
+ * per-TU static, each monitor that includes this header gets a completely
+ * independent pool. A kmalloc monitor (e.g. nomiss) and a pool monitor
+ * (e.g. tlob) therefore coexist without any interference.
+ *
+ * da_pool_return_cb runs from softirq (non-PREEMPT_RT) or rcuc kthread
+ * (PREEMPT_RT); spin_lock_irqsave handles both.
+ */
+struct da_per_obj_pool {
+ struct da_monitor_storage *storage; /* non-NULL ⟹ pool mode */
+ struct da_monitor_storage **free; /* kmalloc'd pointer stack */
+ unsigned int free_top;
+ unsigned int capacity; /* total number of slots */
+ spinlock_t lock;
+};
+
+static struct da_per_obj_pool da_pool = {
+ .lock = __SPIN_LOCK_UNLOCKED(da_pool.lock),
+};
+
+static void da_pool_return_cb(struct rcu_head *head)
+{
+ struct da_monitor_storage *ms =
+ container_of(head, struct da_monitor_storage, rcu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&da_pool.lock, flags);
+ if (!WARN_ON_ONCE(!da_pool.free || da_pool.free_top >=
da_pool.capacity))
+ da_pool.free[da_pool.free_top++] = ms;
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+}
+
+/*
+ * da_create_or_get_pool - pop a slot and insert it into the hash.
+ *
+ * Returns the new da_monitor on success, NULL if the pool is exhausted, or
+ * the existing da_monitor if a concurrent caller already inserted the same id
+ * (in which case the popped slot is returned to the free stack).
+ *
+ * Must be called inside an RCU read-side critical section (guard(rcu)()).
+ */
+static inline struct da_monitor *
+da_create_or_get_pool(da_id_type id, monitor_target target)
+{
+ struct da_monitor_storage *mon_storage, *existing;
+ unsigned long flags;
+
+ spin_lock_irqsave(&da_pool.lock, flags);
+ if (!da_pool.free_top) {
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+ return NULL;
+ }
+ mon_storage = da_pool.free[--da_pool.free_top];
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+
+ mon_storage->id = id;
+ mon_storage->target = target;
+
+ /*
+ * A concurrent caller may have inserted the same id between our
spinlock
+ * release and here. Return the slot to the pool and yield to the
winner.
+ */
+ existing = __da_get_mon_storage(id);
+ if (unlikely(existing)) {
+ spin_lock_irqsave(&da_pool.lock, flags);
+ da_pool.free[da_pool.free_top++] = mon_storage;
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+ return &existing->rv.da_mon;
+ }
+ hash_add_rcu(da_monitor_ht, &mon_storage->node, id);
+ return &mon_storage->rv.da_mon;
+}
+
+
/*
* da_destroy_storage - destroy the per-object storage
*
- * The caller is responsible to synchronise writers, either with locks or
- * implicitly. For instance, if da_destroy_storage is called at sched_exit and
- * da_create_storage can never occur after that, it's safe to call this without
- * locks.
- * This function includes an RCU read-side critical section to synchronise
- * against da_monitor_destroy().
+ * Pool mode: removes from hash and returns the slot via call_rcu().
+ * Kmalloc mode: removes from hash and frees via kfree_rcu().
+ *
+ * Includes an RCU read-side critical section to synchronise against
+ * da_monitor_destroy().
*/
static inline void da_destroy_storage(da_id_type id)
{
@@ -558,7 +670,11 @@ static inline void da_destroy_storage(da_id_type id)
return;
da_monitor_reset_hook(&mon_storage->rv.da_mon);
hash_del_rcu(&mon_storage->node);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ call_rcu(&mon_storage->rcu, da_pool_return_cb);
+#else
kfree_rcu(mon_storage, rcu);
+#endif
}
static void __da_monitor_reset_all(void (*reset)(struct da_monitor *))
@@ -581,13 +697,87 @@ static inline void da_monitor_reset_state_all(void)
__da_monitor_reset_all(da_monitor_reset_state);
}
+/* Not part of the public API; called by da_monitor_init() for DA_ALLOC_POOL.
*/
+static inline int __da_monitor_init_pool(unsigned int prealloc_count)
+{
+ da_pool.storage = kcalloc(prealloc_count, sizeof(*da_pool.storage),
+ GFP_KERNEL);
+ if (!da_pool.storage)
+ return -ENOMEM;
+
+ da_pool.free = kmalloc_array(prealloc_count, sizeof(*da_pool.free),
+ GFP_KERNEL);
+ if (!da_pool.free) {
+ kfree(da_pool.storage);
+ da_pool.storage = NULL;
+ return -ENOMEM;
+ }
+
+ da_pool.capacity = prealloc_count;
+ da_pool.free_top = 0;
+ for (unsigned int i = 0; i < prealloc_count; i++)
+ da_pool.free[da_pool.free_top++] = &da_pool.storage[i];
+ return 0;
+}
+
+/*
+ * da_monitor_init - initialise the per-object monitor
+ *
+ * Selects the allocation path at compile time based on
DA_MON_ALLOCATION_STRATEGY:
+ * DA_ALLOC_POOL - pre-allocates DA_MON_POOL_SIZE storage slots.
+ * DA_ALLOC_AUTO / DA_ALLOC_MANUAL - initialises the hash table only.
+ */
static inline int da_monitor_init(void)
{
hash_init(da_monitor_ht);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ return __da_monitor_init_pool(DA_MON_POOL_SIZE);
+#else
return 0;
+#endif
}
-static inline void da_monitor_destroy(void)
+static inline void da_monitor_destroy_pool(void)
+{
+ struct da_monitor_storage *ms;
+ struct hlist_node *tmp;
+ int bkt;
+
+ /*
+ * Ensure all in-flight tracepoint handlers that may hold a raw pointer
+ * to a pool slot (e.g. tlob_stop_task after its RCU guard exits) have
+ * completed before we begin tearing down the pool. Mirrors the same
+ * call in da_monitor_destroy_kmalloc().
+ */
+ tracepoint_synchronize_unregister();
+
+ /*
+ * Drain any entries that were not stopped before destroy (e.g.
+ * uprobe-started sessions whose stop probe never fired). Call
+ * da_extra_cleanup() before hash_del_rcu() so the hook may safely
+ * call ha_cancel_timer_sync() while the monitor is still reachable.
+ */
+ hash_for_each_safe(da_monitor_ht, bkt, tmp, ms, node) {
+ da_extra_cleanup(&ms->rv.da_mon);
+ hash_del_rcu(&ms->node);
+ call_rcu(&ms->rcu, da_pool_return_cb);
+ }
+
+ /*
+ * rcu_barrier() drains every pending call_rcu() callback, including
+ * both da_pool_return_cb() and any monitor-specific free callbacks
+ * (e.g. tlob_free_rcu) enqueued by da_extra_cleanup().
+ */
+ rcu_barrier();
+ kfree(da_pool.storage);
+ da_pool.storage = NULL;
+ kfree(da_pool.free);
+ da_pool.free = NULL;
+ da_pool.free_top = 0;
+ da_pool.capacity = 0;
+}
+
+static inline void da_monitor_destroy_kmalloc(void)
{
struct da_monitor_storage *mon_storage;
struct hlist_node *tmp;
@@ -607,15 +797,51 @@ static inline void da_monitor_destroy(void)
}
/*
- * Allow the per-object monitors to run allocation manually, necessary if the
- * start condition is in a context problematic for allocation (e.g.
scheduling).
- * In such case, if the storage was pre-allocated without a target, set it now.
+ * da_monitor_destroy - tear down the per-object monitor
+ *
+ * DA_ALLOC_POOL: calls tracepoint_synchronize_unregister() to drain any
+ * in-flight handlers, then iterates the hash draining remaining entries via
+ * da_extra_cleanup() + hash_del_rcu() + call_rcu(), then rcu_barrier() to
+ * wait for all pending da_pool_return_cb() callbacks before freeing the pool.
+ * DA_ALLOC_AUTO / DA_ALLOC_MANUAL: drains remaining entries after
+ * tracepoint_synchronize_unregister() + synchronize_rcu().
*/
-#ifdef DA_SKIP_AUTO_ALLOC
-#define da_prepare_storage da_fill_empty_storage
+static inline void da_monitor_destroy(void)
+{
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ da_monitor_destroy_pool();
#else
+ da_monitor_destroy_kmalloc();
+#endif
+}
+
+/*
+ * da_prepare_storage - obtain (or create) the da_monitor for (id, target)
+ *
+ * The implementation is selected at compile time by
DA_MON_ALLOCATION_STRATEGY:
+ *
+ * DA_ALLOC_AUTO - calls da_create_storage() (lock-free kmalloc_nolock).
+ * DA_ALLOC_POOL - if an entry already exists, returns it; otherwise pops a
+ * slot from the pre-allocated pool and re-looks it up.
+ * Returns NULL if the pool is exhausted.
+ * DA_ALLOC_MANUAL - caller has already inserted storage via
da_create_empty_storage();
+ * only fills in the target field if it was left NULL.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+static inline struct da_monitor *da_prepare_storage(da_id_type id,
+ monitor_target target,
+ struct da_monitor *da_mon)
+{
+ if (da_mon)
+ return da_mon;
+ /* da_create_or_get_pool() returns the da_monitor directly; no
re-lookup needed. */
+ return da_create_or_get_pool(id, target);
+}
+#elif DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_MANUAL
+#define da_prepare_storage da_fill_empty_storage
+#else /* DA_ALLOC_AUTO */
#define da_prepare_storage da_create_storage
-#endif /* DA_SKIP_AUTO_ALLOC */
+#endif
#endif /* RV_MON_TYPE */
diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.c
b/kernel/trace/rv/monitors/nomiss/nomiss.c
index 8ead8783c29f..ac4d334e757f 100644
--- a/kernel/trace/rv/monitors/nomiss/nomiss.c
+++ b/kernel/trace/rv/monitors/nomiss/nomiss.c
@@ -17,8 +17,8 @@
#define RV_MON_TYPE RV_MON_PER_OBJ
#define HA_TIMER_TYPE HA_TIMER_WHEEL
-/* The start condition is on sched_switch, it's dangerous to allocate there */
-#define DA_SKIP_AUTO_ALLOC
+/* Allocate storage in sched_setscheduler; sched_switch is too hot to alloc. */
+#define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_MANUAL
typedef struct sched_dl_entity *monitor_target;
#include "nomiss.h"
#include <rv/ha_monitor.h>
@@ -214,7 +214,7 @@ static void handle_sys_enter(void *data, struct pt_regs
*regs, long id)
if (p->policy == SCHED_DEADLINE)
da_reset(EXPAND_ID_TASK(p));
else if (new_policy == SCHED_DEADLINE)
- da_create_or_get(EXPAND_ID_TASK(p));
+ da_create_empty_storage(get_entity_id(&p->dl, task_cpu(p),
DL_TASK));
}
static void handle_sched_wakeup(void *data, struct task_struct *tsk)
--
2.43.0