Re: [PATCH next v1 1/3] printk: track/limit recursion
On Tue 2021-03-23 22:32:00, John Ogness wrote: > On 2021-03-22, Petr Mladek wrote: > > On Wed 2021-03-17 00:33:24, John Ogness wrote: > >> Track printk() recursion and limit it to 3 levels per-CPU and per-context. > > > >> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c > >> index 2f829fbf0a13..c666e3e43f0c 100644 > >> --- a/kernel/printk/printk.c > >> +++ b/kernel/printk/printk.c > >> +/* Return a pointer to the dedicated counter for the CPU+context of the > >> caller. */ > >> +static char *printk_recursion_counter(void) > >> +{ > >> + int ctx = 0; > >> + > >> +#ifdef CONFIG_PRINTK_NMI > >> + if (in_nmi()) > >> + ctx = 1; > >> +#endif > >> + if (!printk_percpu_data_ready()) > >> + return _count_early[ctx]; > >> + return &((*this_cpu_ptr(_count))[ctx]); > >> +} > > > > It is not a big deal. But using an array for two contexts looks strange > > especially when only one is used on some architectures. > > Also &((*this_cpu_ptr(_count))[ctx]) is quite tricky ;-) > > > > What do you think about the following, please? > > > > static DEFINE_PER_CPU(u8 printk_count); > > static u8 printk_count_early; > > > > #ifdef CONFIG_HAVE_NMI > > static DEFINE_PER_CPU(u8 printk_count_nmi); > > static u8 printk_count_nmi_early; > > #endif > > > > static u8 *printk_recursion_counter(void) > > { > > if (IS_ENABLED(CONFIG_HAVE_NMI) && in_nmi()) { > > if (printk_cpu_data_ready()) > > return this_cpu_ptr(_count_nmi); > > return printk_count_nmi_early; > > } > > > > if (printk_cpu_data_ready()) > > return this_cpu_ptr(_count); > > return printk_count_early; > > } > > I can split it into explicit variables. But is the use of the IS_ENABLED > macro preferred over ifdef? I would prefer: > > static u8 *printk_recursion_counter(void) > { > #ifdef CONFIG_HAVE_NMI > if (in_nmi()) { > if (printk_cpu_data_ready()) > return this_cpu_ptr(_count_nmi); > return printk_count_nmi_early; > } > #endif > if (printk_cpu_data_ready()) > return this_cpu_ptr(_count); > return printk_count_early; > } > > Since @printk_count_nmi and @printk_count_nmi_early would not exist, I > would prefer the pre-processor removes that code block rather than > relying on compiler optimization. Feel free to use #ifdef. Best Regards, Petr
Re: [PATCH next v1 1/3] printk: track/limit recursion
On 2021-03-22, Petr Mladek wrote: > On Wed 2021-03-17 00:33:24, John Ogness wrote: >> Track printk() recursion and limit it to 3 levels per-CPU and per-context. > > Please, explain why it is added. I mean that it will > allow remove printk_safe that provides recursion protection at the > moment. OK. >> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c >> index 2f829fbf0a13..c666e3e43f0c 100644 >> --- a/kernel/printk/printk.c >> +++ b/kernel/printk/printk.c >> @@ -1940,6 +1940,71 @@ static void call_console_drivers(const char >> *ext_text, size_t ext_len, >> } >> } >> >> +/* >> + * Recursion is tracked separately on each CPU. If NMIs are supported, an >> + * additional NMI context per CPU is also separately tracked. Until per-CPU >> + * is available, a separate "early tracking" is performed. >> + */ >> +#ifdef CONFIG_PRINTK_NMI > > CONFIG_PRINTK_NMI is a shortcut for CONFIG_PRINTK && CONFIG_HAVE_NMI. > It should be possible to use CONFIG_HAVE_NMI here because this should > be in section where CONFIG_PRINTK is defined. > > This would make sense if it allows to remove CONFIG_PRINTK_NMI > entirely. IMHO, it would be nice to remove one layer in the > config options of possible. OK. I will remove CONFIG_PRINTK_NMI for v2. >> +#define PRINTK_CTX_NUM 2 >> +#else >> +#define PRINTK_CTX_NUM 1 >> +#endif >> +static DEFINE_PER_CPU(char [PRINTK_CTX_NUM], printk_count); >> +static char printk_count_early[PRINTK_CTX_NUM]; >> + >> +/* >> + * Recursion is limited to keep the output sane. printk() should not require >> + * more than 1 level of recursion (allowing, for example, printk() to >> trigger >> + * a WARN), but a higher value is used in case some printk-internal errors >> + * exist, such as the ringbuffer validation checks failing. >> + */ >> +#define PRINTK_MAX_RECURSION 3 >> + >> +/* Return a pointer to the dedicated counter for the CPU+context of the >> caller. */ >> +static char *printk_recursion_counter(void) >> +{ >> +int ctx = 0; >> + >> +#ifdef CONFIG_PRINTK_NMI >> +if (in_nmi()) >> +ctx = 1; >> +#endif >> +if (!printk_percpu_data_ready()) >> +return _count_early[ctx]; >> +return &((*this_cpu_ptr(_count))[ctx]); >> +} > > It is not a big deal. But using an array for two contexts looks strange > especially when only one is used on some architectures. > Also &((*this_cpu_ptr(_count))[ctx]) is quite tricky ;-) > > What do you think about the following, please? > > static DEFINE_PER_CPU(u8 printk_count); > static u8 printk_count_early; > > #ifdef CONFIG_HAVE_NMI > static DEFINE_PER_CPU(u8 printk_count_nmi); > static u8 printk_count_nmi_early; > #endif > > static u8 *printk_recursion_counter(void) > { > if (IS_ENABLED(CONFIG_HAVE_NMI) && in_nmi()) { > if (printk_cpu_data_ready()) > return this_cpu_ptr(_count_nmi); > return printk_count_nmi_early; > } > > if (printk_cpu_data_ready()) > return this_cpu_ptr(_count); > return printk_count_early; > } I can split it into explicit variables. But is the use of the IS_ENABLED macro preferred over ifdef? I would prefer: static u8 *printk_recursion_counter(void) { #ifdef CONFIG_HAVE_NMI if (in_nmi()) { if (printk_cpu_data_ready()) return this_cpu_ptr(_count_nmi); return printk_count_nmi_early; } #endif if (printk_cpu_data_ready()) return this_cpu_ptr(_count); return printk_count_early; } Since @printk_count_nmi and @printk_count_nmi_early would not exist, I would prefer the pre-processor removes that code block rather than relying on compiler optimization. John Ogness
Re: [PATCH next v1 1/3] printk: track/limit recursion
On Mon 2021-03-22 20:13:51, Sergey Senozhatsky wrote: > On (21/03/22 11:53), John Ogness wrote: > > On 2021-03-21, Sergey Senozhatsky wrote: > > >> @@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, > > >> */ > > >> ts_nsec = local_clock(); > > >> > > >> +if (!printk_enter_irqsave()) > > >> +return 0; > > > > > > I guess it can be interesting to somehow signal us that we had > > > printk() recursion overflow, and how many messages we lost. > > > > Honestly, if we hit 3 levels of recursion, we are probably dealing with > > an infinite recursion issue. > > I tend to agree. > > > I do not see the value of counting the overflows in that case. The > > logged messages at that recursion level would ben enough to point > > us to the problem. > > > > > 3 levels of recursion seem like reasonable limit, but I maybe wouldn't > > > mind one extra level. > > > > With 3 levels, we will see all the messages of: > > > > printk -> WARN_ON -> WARN_ON -> WARN_ON This is the worst case. Many messages are just a single line or we use WARN_ON_ONCE. > Well, not necessarily this simple. > > printk > vsprintf > handle_foo_specifier >printk > call_console_drivers > timekeeping > printk >vsprintf > > We saw in the past that enabling CONFIG_DEBUG_OBJECTS (if I'm not > mistaken) can add quite a bit of extra printk recursion paths. > > We also have other CONFIG_DEBUG_* config options that can pop up as > recursive printk-s here and there. For instance, from > vsprintf::foo_specifier() > where we escape from printk() to various kernel subsystems: net, block, > etc. > > Maybe sometimes on level 3+ we'll see something interesting, > but I've no strong opinion on this. Honestly, my first reaction was the same as Sergey's. But John has a point that too many levels might be hard to read. I think that 3 levels are a good start. We could always change it when we hit a problem in practice. > > Keep in mind that each additional level causes the reading of the logs > > to be significantly more complex. Each level increases the output > > exponentially: > > Yes, I realize that. That's why I suggested that maybe recursive > printk-s can have some special extra prefix. Recursive printk-s > will interleave with whatever is being printed on this_cpu, so > prefix might be helpful. This is an interesting area, definitely. I am not sure if it is worth it though. I would keep it simple and cut output on 3rd level for now. We could always improve it when we hit a problem in the real life. Best Regards, Petr
Re: [PATCH next v1 1/3] printk: track/limit recursion
On Wed 2021-03-17 00:33:24, John Ogness wrote: > Track printk() recursion and limit it to 3 levels per-CPU and per-context. Please, explain why it is added. I mean that it will allow remove printk_safe that provides recursion protection at the moment. > Signed-off-by: John Ogness > --- > kernel/printk/printk.c | 80 -- > 1 file changed, 77 insertions(+), 3 deletions(-) > > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c > index 2f829fbf0a13..c666e3e43f0c 100644 > --- a/kernel/printk/printk.c > +++ b/kernel/printk/printk.c > @@ -1940,6 +1940,71 @@ static void call_console_drivers(const char *ext_text, > size_t ext_len, > } > } > > +/* > + * Recursion is tracked separately on each CPU. If NMIs are supported, an > + * additional NMI context per CPU is also separately tracked. Until per-CPU > + * is available, a separate "early tracking" is performed. > + */ > +#ifdef CONFIG_PRINTK_NMI CONFIG_PRINTK_NMI is a shortcut for CONFIG_PRINTK && CONFIG_HAVE_NMI. It should be possible to use CONFIG_HAVE_NMI here because this should be in section where CONFIG_PRINTK is defined. This would make sense if it allows to remove CONFIG_PRINTK_NMI entirely. IMHO, it would be nice to remove one layer in the config options of possible. > +#define PRINTK_CTX_NUM 2 > +#else > +#define PRINTK_CTX_NUM 1 > +#endif > +static DEFINE_PER_CPU(char [PRINTK_CTX_NUM], printk_count); > +static char printk_count_early[PRINTK_CTX_NUM]; > + > +/* > + * Recursion is limited to keep the output sane. printk() should not require > + * more than 1 level of recursion (allowing, for example, printk() to trigger > + * a WARN), but a higher value is used in case some printk-internal errors > + * exist, such as the ringbuffer validation checks failing. > + */ > +#define PRINTK_MAX_RECURSION 3 > + > +/* Return a pointer to the dedicated counter for the CPU+context of the > caller. */ > +static char *printk_recursion_counter(void) > +{ > + int ctx = 0; > + > +#ifdef CONFIG_PRINTK_NMI > + if (in_nmi()) > + ctx = 1; > +#endif > + if (!printk_percpu_data_ready()) > + return _count_early[ctx]; > + return &((*this_cpu_ptr(_count))[ctx]); > +} It is not a big deal. But using an array for two contexts looks strange especially when only one is used on some architectures. Also &((*this_cpu_ptr(_count))[ctx]) is quite tricky ;-) What do you think about the following, please? static DEFINE_PER_CPU(u8 printk_count); static u8 printk_count_early; #ifdef CONFIG_HAVE_NMI static DEFINE_PER_CPU(u8 printk_count_nmi); static u8 printk_count_nmi_early; #endif static u8 *printk_recursion_counter(void) { if (IS_ENABLED(CONFIG_HAVE_NMI) && in_nmi()) { if (printk_cpu_data_ready()) return this_cpu_ptr(_count_nmi); return printk_count_nmi_early; } if (printk_cpu_data_ready()) return this_cpu_ptr(_count); return printk_count_early; } Otherwise, it looks good to me. I like the simplicity. Best Regards, Petr
Re: [PATCH next v1 1/3] printk: track/limit recursion
On (21/03/22 11:53), John Ogness wrote: > On 2021-03-21, Sergey Senozhatsky wrote: > >> @@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, > >> */ > >>ts_nsec = local_clock(); > >> > >> + if (!printk_enter_irqsave()) > >> + return 0; > > > > I guess it can be interesting to somehow signal us that we had > > printk() recursion overflow, and how many messages we lost. > > Honestly, if we hit 3 levels of recursion, we are probably dealing with > an infinite recursion issue. I tend to agree. > I do not see the value of counting the overflows in that case. The > logged messages at that recursion level would ben enough to point > us to the problem. > > > 3 levels of recursion seem like reasonable limit, but I maybe wouldn't > > mind one extra level. > > With 3 levels, we will see all the messages of: > > printk -> WARN_ON -> WARN_ON -> WARN_ON Well, not necessarily this simple. printk vsprintf handle_foo_specifier printk call_console_drivers timekeeping printk vsprintf We saw in the past that enabling CONFIG_DEBUG_OBJECTS (if I'm not mistaken) can add quite a bit of extra printk recursion paths. We also have other CONFIG_DEBUG_* config options that can pop up as recursive printk-s here and there. For instance, from vsprintf::foo_specifier() where we escape from printk() to various kernel subsystems: net, block, etc. Maybe sometimes on level 3+ we'll see something interesting, but I've no strong opinion on this. > Keep in mind that each additional level causes the reading of the logs > to be significantly more complex. Each level increases the output > exponentially: Yes, I realize that. That's why I suggested that maybe recursive printk-s can have some special extra prefix. Recursive printk-s will interleave with whatever is being printed on this_cpu, so prefix might be helpful. -ss
Re: [PATCH next v1 1/3] printk: track/limit recursion
On 2021-03-21, Sergey Senozhatsky wrote: >> @@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, >> */ >> ts_nsec = local_clock(); >> >> +if (!printk_enter_irqsave()) >> +return 0; > > I guess it can be interesting to somehow signal us that we had > printk() recursion overflow, and how many messages we lost. Honestly, if we hit 3 levels of recursion, we are probably dealing with an infinite recursion issue. I do not see the value of counting the overflows in that case. The logged messages at that recursion level would ben enough to point us to the problem. > 3 levels of recursion seem like reasonable limit, but I maybe wouldn't > mind one extra level. With 3 levels, we will see all the messages of: printk -> WARN_ON -> WARN_ON -> WARN_ON Keep in mind that each additional level causes the reading of the logs to be significantly more complex. Each level increases the output exponentially: for every line1 in 1st_WARN_ON { for every line2 in 2nd_WARN_ON { for every line3 in 3rd_WARN_ON { print $line3 } print $line2 } print $line1 } print $line0 IMHO 2 levels is enough because we should _never_ hit 2 levels of recursion. If we do, the log output at that second level should be enough to point to the bug. IMHO printing a third level just makes things unnecessarily difficult to read. (My series uses 3 levels as a compromise on my part. I would prefer reducing it to 2.) > And maybe we could add some sort of message prefix for high levels of > recursion nesting (levels 3+), so that things should not be normal > will be on the radars and, possibly, will be reported. I considered this, but am very hesitant to change the output format. Also, the CUT_HERE usage (combined with PRINTK_CALLER) seem to be enough. John Ogness
Re: [PATCH next v1 1/3] printk: track/limit recursion
On (21/03/17 00:33), John Ogness wrote: [..] > static inline void printk_delay(void) > @@ -2040,11 +2105,13 @@ int vprintk_store(int facility, int level, > struct prb_reserved_entry e; > enum log_flags lflags = 0; > struct printk_record r; > + unsigned long irqflags; > u16 trunc_msg_len = 0; > char prefix_buf[8]; > u16 reserve_size; > va_list args2; > u16 text_len; > + int ret = 0; > u64 ts_nsec; > > /* > @@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, >*/ > ts_nsec = local_clock(); > > + if (!printk_enter_irqsave()) > + return 0; I guess it can be interesting to somehow signal us that we had printk() recursion overflow, and how many messages we lost. 3 levels of recursion seem like reasonable limit, but I maybe wouldn't mind one extra level. And maybe we could add some sort of message prefix for high levels of recursion nesting (levels 3+), so that things should not be normal will be on the radars and, possibly, will be reported. -ss
[PATCH next v1 1/3] printk: track/limit recursion
Track printk() recursion and limit it to 3 levels per-CPU and per-context. Signed-off-by: John Ogness --- kernel/printk/printk.c | 80 -- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2f829fbf0a13..c666e3e43f0c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1940,6 +1940,71 @@ static void call_console_drivers(const char *ext_text, size_t ext_len, } } +/* + * Recursion is tracked separately on each CPU. If NMIs are supported, an + * additional NMI context per CPU is also separately tracked. Until per-CPU + * is available, a separate "early tracking" is performed. + */ +#ifdef CONFIG_PRINTK_NMI +#define PRINTK_CTX_NUM 2 +#else +#define PRINTK_CTX_NUM 1 +#endif +static DEFINE_PER_CPU(char [PRINTK_CTX_NUM], printk_count); +static char printk_count_early[PRINTK_CTX_NUM]; + +/* + * Recursion is limited to keep the output sane. printk() should not require + * more than 1 level of recursion (allowing, for example, printk() to trigger + * a WARN), but a higher value is used in case some printk-internal errors + * exist, such as the ringbuffer validation checks failing. + */ +#define PRINTK_MAX_RECURSION 3 + +/* Return a pointer to the dedicated counter for the CPU+context of the caller. */ +static char *printk_recursion_counter(void) +{ + int ctx = 0; + +#ifdef CONFIG_PRINTK_NMI + if (in_nmi()) + ctx = 1; +#endif + if (!printk_percpu_data_ready()) + return _count_early[ctx]; + return &((*this_cpu_ptr(_count))[ctx]); +} + +/* + * Enter recursion tracking. Interrupts are disabled to simplify tracking. + * The caller must check the return value to see if the recursion is allowed. + * On failure, interrupts are not disabled. + */ +static bool printk_enter_irqsave(unsigned long *flags) +{ + char *count; + + local_irq_save(*flags); + count = printk_recursion_counter(); + if (*count > PRINTK_MAX_RECURSION) { + local_irq_restore(*flags); + return false; + } + (*count)++; + + return true; +} + +/* Exit recursion tracking, restoring interrupts. */ +static void printk_exit_irqrestore(unsigned long flags) +{ + char *count; + + count = printk_recursion_counter(); + (*count)--; + local_irq_restore(flags); +} + int printk_delay_msec __read_mostly; static inline void printk_delay(void) @@ -2040,11 +2105,13 @@ int vprintk_store(int facility, int level, struct prb_reserved_entry e; enum log_flags lflags = 0; struct printk_record r; + unsigned long irqflags; u16 trunc_msg_len = 0; char prefix_buf[8]; u16 reserve_size; va_list args2; u16 text_len; + int ret = 0; u64 ts_nsec; /* @@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, */ ts_nsec = local_clock(); + if (!printk_enter_irqsave()) + return 0; + /* * The sprintf needs to come first since the syslog prefix might be * passed in as a parameter. An extra byte must be reserved so that @@ -2092,7 +2162,8 @@ int vprintk_store(int facility, int level, prb_commit(); } - return text_len; + ret = text_len; + goto out; } } @@ -2108,7 +2179,7 @@ int vprintk_store(int facility, int level, prb_rec_init_wr(, reserve_size + trunc_msg_len); if (!prb_reserve(, prb, )) - return 0; + goto out; } /* fill message */ @@ -2130,7 +2201,10 @@ int vprintk_store(int facility, int level, else prb_final_commit(); - return (text_len + trunc_msg_len); + ret = text_len + trunc_msg_len; +out: + printk_exit_irqrestore(irqflags); + return ret; } asmlinkage int vprintk_emit(int facility, int level, -- 2.20.1