housekeeping_update_types() installs new cpumasks via rcu_assign_pointer()
and frees the old ones after synchronize_rcu(); callers that dereference
the old pointer without holding an RCU read lock can access freed memory.

Fix the four call sites:

kernel/sched/core.c (get_nohz_timer_target, HK_TYPE_KERNEL_NOISE):
  The guard(rcu)() was acquired after housekeeping_cpumask().  Move it
  before the call and switch to housekeeping_cpumask_rcu() so hk_mask
  is read inside the RCU read-side critical section.  HK_TYPE_KERNEL_NOISE
  is updated at runtime by housekeeping_update_types(); this fix is
  required for correctness.

drivers/hv/channel_mgmt.c (init_vp_index, HK_TYPE_MANAGED_IRQ):
  The function stored the raw pointer in a local variable and used it
  across GFP_KERNEL allocations (which can sleep, so an RCU read lock
  cannot span them).  Allocate both cpumask_var_t buffers first, then
  snapshot the housekeeping mask under a brief rcu_read_lock() and use
  the snapshot throughout.  HK_TYPE_MANAGED_IRQ is updated at runtime;
  this fix is required for correctness.

kernel/time/hrtimer.c (get_target_base, HK_TYPE_TIMER):
  cpumask_any_and() against housekeeping_cpumask(HK_TYPE_TIMER) was
  called without any lock.  Wrap with rcu_read_lock()/rcu_read_unlock()
  and use housekeeping_cpumask_rcu().  HK_TYPE_TIMER is not changed at
  runtime in this series; this is a defensive fix to satisfy the
  housekeeping_dereference_check() lockdep annotation for future-proofing.
  hrtimers_cpu_dying() is already safe: it runs under the cpu_hotplug_lock
  write side, which housekeeping_dereference_check() already permits.

arch/arm64/kernel/topology.c (arch_freq_get_on_cpu, HK_TYPE_TICK):
  cpumask_intersects() against housekeeping_cpumask(HK_TYPE_TICK) was
  called without any lock.  Evaluate under rcu_read_lock() and store
  the boolean result before releasing the lock.  HK_TYPE_TICK is not
  changed at runtime in this series; this is a defensive fix.

Signed-off-by: Jing Wu <[email protected]>
Signed-off-by: Qiliang Yuan <[email protected]>
---
 arch/arm64/kernel/topology.c |  9 ++++++--
 drivers/hv/channel_mgmt.c    | 50 ++++++++++++++++++++++++++++++--------------
 kernel/sched/core.c          |  3 +--
 kernel/time/hrtimer.c        |  5 ++++-
 4 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b32f13358fbb1..8f4329b57cea7 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -212,8 +212,13 @@ int arch_freq_get_on_cpu(int cpu)
                        if (!policy)
                                return -EINVAL;
 
-                       if (!cpumask_intersects(policy->related_cpus,
-                                               
housekeeping_cpumask(HK_TYPE_TICK))) {
+                       bool no_hk_in_policy;
+
+                       rcu_read_lock();
+                       no_hk_in_policy = 
!cpumask_intersects(policy->related_cpus,
+                                                             
housekeeping_cpumask_rcu(HK_TYPE_TICK));
+                       rcu_read_unlock();
+                       if (no_hk_in_policy) {
                                cpufreq_cpu_put(policy);
                                return -EOPNOTSUPP;
                        }
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 84eb0a6a0b546..fc5247e92e1b3 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -750,26 +750,43 @@ static void init_vp_index(struct vmbus_channel *channel)
 {
        bool perf_chn = hv_is_perf_channel(channel);
        u32 i, ncpu = num_online_cpus();
-       cpumask_var_t available_mask;
+       cpumask_var_t available_mask, hk_snap;
        struct cpumask *allocated_mask;
-       const struct cpumask *hk_mask = 
housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
        u32 target_cpu;
        int numa_node;
 
-       if (!perf_chn ||
-           !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
-           cpumask_empty(hk_mask)) {
-               /*
-                * If the channel is not a performance critical
-                * channel, bind it to VMBUS_CONNECT_CPU.
-                * In case alloc_cpumask_var() fails, bind it to
-                * VMBUS_CONNECT_CPU.
-                * If all the cpus are isolated, bind it to
-                * VMBUS_CONNECT_CPU.
-                */
+       if (!perf_chn) {
+               channel->target_cpu = VMBUS_CONNECT_CPU;
+               return;
+       }
+
+       if (!alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+               channel->target_cpu = VMBUS_CONNECT_CPU;
+               hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+               return;
+       }
+
+       /*
+        * Snapshot HK_TYPE_MANAGED_IRQ cpumask under RCU read lock.
+        * housekeeping_update_types() frees the old cpumask after
+        * synchronize_rcu(), so we must not hold the pointer beyond an
+        * RCU read-side critical section.
+        */
+       if (!alloc_cpumask_var(&hk_snap, GFP_KERNEL)) {
+               free_cpumask_var(available_mask);
+               channel->target_cpu = VMBUS_CONNECT_CPU;
+               hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+               return;
+       }
+       rcu_read_lock();
+       cpumask_copy(hk_snap, housekeeping_cpumask_rcu(HK_TYPE_MANAGED_IRQ));
+       rcu_read_unlock();
+
+       if (cpumask_empty(hk_snap)) {
+               free_cpumask_var(hk_snap);
+               free_cpumask_var(available_mask);
                channel->target_cpu = VMBUS_CONNECT_CPU;
-               if (perf_chn)
-                       hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+               hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
                return;
        }
 
@@ -788,7 +805,7 @@ static void init_vp_index(struct vmbus_channel *channel)
 
 retry:
                cpumask_xor(available_mask, allocated_mask, 
cpumask_of_node(numa_node));
-               cpumask_and(available_mask, available_mask, hk_mask);
+               cpumask_and(available_mask, available_mask, hk_snap);
 
                if (cpumask_empty(available_mask)) {
                        /*
@@ -809,6 +826,7 @@ static void init_vp_index(struct vmbus_channel *channel)
 
        channel->target_cpu = target_cpu;
 
+       free_cpumask_var(hk_snap);
        free_cpumask_var(available_mask);
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c69..371b509d92164 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1272,9 +1272,8 @@ int get_nohz_timer_target(void)
                default_cpu = cpu;
        }
 
-       hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
-
        guard(rcu)();
+       hk_mask = housekeeping_cpumask_rcu(HK_TYPE_KERNEL_NOISE);
 
        for_each_domain(cpu, sd) {
                for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5bd6efe598f0f..18e17a9dad67b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -242,8 +242,11 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, 
struct hrtimer_clock_
 static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base 
*base, bool pinned)
 {
        if (!hrtimer_base_is_online(base)) {
-               int cpu = cpumask_any_and(cpu_online_mask, 
housekeeping_cpumask(HK_TYPE_TIMER));
+               int cpu;
 
+               rcu_read_lock();
+               cpu = cpumask_any_and(cpu_online_mask, 
housekeeping_cpumask_rcu(HK_TYPE_TIMER));
+               rcu_read_unlock();
                return &per_cpu(hrtimer_bases, cpu);
        }
 

-- 
2.43.0


Reply via email to