housekeeping_update_types() installs new cpumasks via rcu_assign_pointer() and frees the old ones after synchronize_rcu(); callers that dereference the old pointer without holding an RCU read lock can access freed memory.
Fix the four call sites: kernel/sched/core.c (get_nohz_timer_target, HK_TYPE_KERNEL_NOISE): The guard(rcu)() was acquired after housekeeping_cpumask(). Move it before the call and switch to housekeeping_cpumask_rcu() so hk_mask is read inside the RCU read-side critical section. HK_TYPE_KERNEL_NOISE is updated at runtime by housekeeping_update_types(); this fix is required for correctness. drivers/hv/channel_mgmt.c (init_vp_index, HK_TYPE_MANAGED_IRQ): The function stored the raw pointer in a local variable and used it across GFP_KERNEL allocations (which can sleep, so an RCU read lock cannot span them). Allocate both cpumask_var_t buffers first, then snapshot the housekeeping mask under a brief rcu_read_lock() and use the snapshot throughout. HK_TYPE_MANAGED_IRQ is updated at runtime; this fix is required for correctness. kernel/time/hrtimer.c (get_target_base, HK_TYPE_TIMER): cpumask_any_and() against housekeeping_cpumask(HK_TYPE_TIMER) was called without any lock. Wrap with rcu_read_lock()/rcu_read_unlock() and use housekeeping_cpumask_rcu(). HK_TYPE_TIMER is not changed at runtime in this series; this is a defensive fix to satisfy the housekeeping_dereference_check() lockdep annotation for future-proofing. hrtimers_cpu_dying() is already safe: it runs under the cpu_hotplug_lock write side, which housekeeping_dereference_check() already permits. arch/arm64/kernel/topology.c (arch_freq_get_on_cpu, HK_TYPE_TICK): cpumask_intersects() against housekeeping_cpumask(HK_TYPE_TICK) was called without any lock. Evaluate under rcu_read_lock() and store the boolean result before releasing the lock. HK_TYPE_TICK is not changed at runtime in this series; this is a defensive fix. Signed-off-by: Jing Wu <[email protected]> Signed-off-by: Qiliang Yuan <[email protected]> --- arch/arm64/kernel/topology.c | 9 ++++++-- drivers/hv/channel_mgmt.c | 50 ++++++++++++++++++++++++++++++-------------- kernel/sched/core.c | 3 +-- kernel/time/hrtimer.c | 5 ++++- 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b32f13358fbb1..8f4329b57cea7 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -212,8 +212,13 @@ int arch_freq_get_on_cpu(int cpu) if (!policy) return -EINVAL; - if (!cpumask_intersects(policy->related_cpus, - housekeeping_cpumask(HK_TYPE_TICK))) { + bool no_hk_in_policy; + + rcu_read_lock(); + no_hk_in_policy = !cpumask_intersects(policy->related_cpus, + housekeeping_cpumask_rcu(HK_TYPE_TICK)); + rcu_read_unlock(); + if (no_hk_in_policy) { cpufreq_cpu_put(policy); return -EOPNOTSUPP; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 84eb0a6a0b546..fc5247e92e1b3 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -750,26 +750,43 @@ static void init_vp_index(struct vmbus_channel *channel) { bool perf_chn = hv_is_perf_channel(channel); u32 i, ncpu = num_online_cpus(); - cpumask_var_t available_mask; + cpumask_var_t available_mask, hk_snap; struct cpumask *allocated_mask; - const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); u32 target_cpu; int numa_node; - if (!perf_chn || - !alloc_cpumask_var(&available_mask, GFP_KERNEL) || - cpumask_empty(hk_mask)) { - /* - * If the channel is not a performance critical - * channel, bind it to VMBUS_CONNECT_CPU. - * In case alloc_cpumask_var() fails, bind it to - * VMBUS_CONNECT_CPU. - * If all the cpus are isolated, bind it to - * VMBUS_CONNECT_CPU. - */ + if (!perf_chn) { + channel->target_cpu = VMBUS_CONNECT_CPU; + return; + } + + if (!alloc_cpumask_var(&available_mask, GFP_KERNEL)) { + channel->target_cpu = VMBUS_CONNECT_CPU; + hv_set_allocated_cpu(VMBUS_CONNECT_CPU); + return; + } + + /* + * Snapshot HK_TYPE_MANAGED_IRQ cpumask under RCU read lock. + * housekeeping_update_types() frees the old cpumask after + * synchronize_rcu(), so we must not hold the pointer beyond an + * RCU read-side critical section. + */ + if (!alloc_cpumask_var(&hk_snap, GFP_KERNEL)) { + free_cpumask_var(available_mask); + channel->target_cpu = VMBUS_CONNECT_CPU; + hv_set_allocated_cpu(VMBUS_CONNECT_CPU); + return; + } + rcu_read_lock(); + cpumask_copy(hk_snap, housekeeping_cpumask_rcu(HK_TYPE_MANAGED_IRQ)); + rcu_read_unlock(); + + if (cpumask_empty(hk_snap)) { + free_cpumask_var(hk_snap); + free_cpumask_var(available_mask); channel->target_cpu = VMBUS_CONNECT_CPU; - if (perf_chn) - hv_set_allocated_cpu(VMBUS_CONNECT_CPU); + hv_set_allocated_cpu(VMBUS_CONNECT_CPU); return; } @@ -788,7 +805,7 @@ static void init_vp_index(struct vmbus_channel *channel) retry: cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node)); - cpumask_and(available_mask, available_mask, hk_mask); + cpumask_and(available_mask, available_mask, hk_snap); if (cpumask_empty(available_mask)) { /* @@ -809,6 +826,7 @@ static void init_vp_index(struct vmbus_channel *channel) channel->target_cpu = target_cpu; + free_cpumask_var(hk_snap); free_cpumask_var(available_mask); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b8871449d3c69..371b509d92164 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1272,9 +1272,8 @@ int get_nohz_timer_target(void) default_cpu = cpu; } - hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE); - guard(rcu)(); + hk_mask = housekeeping_cpumask_rcu(HK_TYPE_KERNEL_NOISE); for_each_domain(cpu, sd) { for_each_cpu_and(i, sched_domain_span(sd), hk_mask) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5bd6efe598f0f..18e17a9dad67b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -242,8 +242,11 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_ static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, bool pinned) { if (!hrtimer_base_is_online(base)) { - int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + int cpu; + rcu_read_lock(); + cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask_rcu(HK_TYPE_TIMER)); + rcu_read_unlock(); return &per_cpu(hrtimer_bases, cpu); } -- 2.43.0

