On Mon, Apr 05, 2021 at 07:42:03PM -0400, Waiman Long wrote:
> The handling of sysrq key can be activated by echoing the key to
> /proc/sysrq-trigger or via the magic key sequence typed into a terminal
> that is connected to the system in some way (serial, USB or other mean).
> In the former case, the handling is done in a user context. In the
> latter case, it is likely to be in an interrupt context.

> [ 7809.796281]  </NMI>
> [ 7809.796282]  _raw_spin_lock_irqsave+0x32/0x40
> [ 7809.796283]  print_cpu+0x261/0x7c0
> [ 7809.796283]  sysrq_sched_debug_show+0x34/0x50
> [ 7809.796284]  sysrq_handle_showstate+0xc/0x20
> [ 7809.796284]  __handle_sysrq.cold.11+0x48/0xfb
> [ 7809.796285]  write_sysrq_trigger+0x2b/0x30
> [ 7809.796285]  proc_reg_write+0x39/0x60
> [ 7809.796286]  vfs_write+0xa5/0x1a0
> [ 7809.796286]  ksys_write+0x4f/0xb0
> [ 7809.796287]  do_syscall_64+0x5b/0x1a0
> [ 7809.796287]  entry_SYSCALL_64_after_hwframe+0x65/0xca
> [ 7809.796288] RIP: 0033:0x7fabe4ceb648
> 
> The purpose of sched_debug_lock is to serialize the use of the global
> cgroup_path[] buffer in print_cpu(). The rests of the printk calls don't
> need serialization from sched_debug_lock.

> The print_cpu() function has two callers - sched_debug_show() and
> sysrq_sched_debug_show(). 

So what idiot is doing sysrq and that proc file at the same time? Why is
it a problem now?

> @@ -470,16 +468,49 @@ static void print_cfs_group_stats(struct seq_file *m, 
> int cpu, struct task_group
>  #endif
>  
>  #ifdef CONFIG_CGROUP_SCHED
> +static DEFINE_SPINLOCK(sched_debug_lock);
>  static char group_path[PATH_MAX];
> +static enum {
> +     TOKEN_NONE,
> +     TOKEN_ACQUIRED,
> +     TOKEN_NA        /* Not applicable */
> +} console_token = TOKEN_ACQUIRED;

> +/*
> + * All the print_cpu() callers from sched_debug_show() will be allowed
> + * to contend for sched_debug_lock and use group_path[] as their SEQ_printf()
> + * calls will be much faster. However only one print_cpu() caller from
> + * sysrq_sched_debug_show() which outputs to the console will be allowed
> + * to use group_path[]. Another parallel console writer will have to use
> + * a shorter stack buffer instead. Since the console output will be garbled
> + * anyway, truncation of some cgroup paths shouldn't be a big issue.
> + */
> +#define SEQ_printf_task_group_path(m, tg, fmt...)                    \
> +{                                                                    \
> +     unsigned long flags;                                            \
> +     int token = m ? TOKEN_NA                                        \
> +                   : xchg_acquire(&console_token, TOKEN_NONE);       \
> +                                                                     \
> +     if (token == TOKEN_NONE) {                                      \
> +             char buf[128];                                          \
> +             task_group_path(tg, buf, sizeof(buf));                  \
> +             SEQ_printf(m, fmt, buf);                                \
> +     } else {                                                        \
> +             spin_lock_irqsave(&sched_debug_lock, flags);            \
> +             task_group_path(tg, group_path, sizeof(group_path));    \
> +             SEQ_printf(m, fmt, group_path);                         \
> +             spin_unlock_irqrestore(&sched_debug_lock, flags);       \
> +             if (token == TOKEN_ACQUIRED)                            \
> +                     smp_store_release(&console_token, token);       \
> +     }                                                               \
>  }

This is disgusting... you have an open-coded test-and-set lock like
thing *AND* a spinlock, what gives?


What's wrong with something simple like this?

---
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4b49cc2af5c4..2ac2977f3b96 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -8,8 +8,6 @@
  */
 #include "sched.h"
 
-static DEFINE_SPINLOCK(sched_debug_lock);
-
 /*
  * This allows printing both to /proc/sched_debug and
  * to the console
@@ -470,6 +468,7 @@ static void print_cfs_group_stats(struct seq_file *m, int 
cpu, struct task_group
 #endif
 
 #ifdef CONFIG_CGROUP_SCHED
+static DEFINE_SPINLOCK(group_path_lock);
 static char group_path[PATH_MAX];
 
 static char *task_group_path(struct task_group *tg)
@@ -481,6 +480,22 @@ static char *task_group_path(struct task_group *tg)
 
        return group_path;
 }
+
+#define SEQ_printf_task_group_path(m, tg)                              \
+do {                                                                   \
+       if (spin_trylock(&group_path_lock)) {                           \
+               task_group_path(tg, group_path, sizeof(group_path));    \
+               SEQ_printf(m, "%s", group_path);                        \
+               spin_unlock(&group_path_lock);                          \
+       } else {                                                        \
+               SEQ_printf(m, "looser!");                               \
+       }
+} while (0)
+
+#else
+
+#define SEQ_printf_task_group_path(m, tg) do { } while (0)
+
 #endif
 
 static void
@@ -505,9 +520,8 @@ print_task(struct seq_file *m, struct rq *rq, struct 
task_struct *p)
 #ifdef CONFIG_NUMA_BALANCING
        SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 #endif
-#ifdef CONFIG_CGROUP_SCHED
-       SEQ_printf(m, " %s", task_group_path(task_group(p)));
-#endif
+       SEQ_printf(m, " ");
+       SEQ_printf_task_group_path(m, task_group(p));
 
        SEQ_printf(m, "\n");
 }
@@ -541,13 +555,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct 
cfs_rq *cfs_rq)
        struct sched_entity *last;
        unsigned long flags;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
        SEQ_printf(m, "\n");
-       SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
-#else
+       SEQ_printf(m, "cfs_rq[%d]:", cpu);
+       SEQ_printf_task_group_path(m, cfs_rq->tg);
        SEQ_printf(m, "\n");
-       SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
-#endif
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
                        SPLIT_NS(cfs_rq->exec_clock));
 
@@ -612,13 +623,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct 
cfs_rq *cfs_rq)
 
 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
-#ifdef CONFIG_RT_GROUP_SCHED
        SEQ_printf(m, "\n");
-       SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
-#else
+       SEQ_printf(m, "rt_rq[%d]:", cpu);
+       SEQ_printf_task_group_path(m, rt_rq->tg);
        SEQ_printf(m, "\n");
-       SEQ_printf(m, "rt_rq[%d]:\n", cpu);
-#endif
 
 #define P(x) \
        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -666,7 +674,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq 
*dl_rq)
 static void print_cpu(struct seq_file *m, int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
 
 #ifdef CONFIG_X86
        {
@@ -717,13 +724,11 @@ do {                                                      
                \
        }
 #undef P
 
-       spin_lock_irqsave(&sched_debug_lock, flags);
        print_cfs_stats(m, cpu);
        print_rt_stats(m, cpu);
        print_dl_stats(m, cpu);
 
        print_rq(m, rq, cpu);
-       spin_unlock_irqrestore(&sched_debug_lock, flags);
        SEQ_printf(m, "\n");
 }
 

Reply via email to