From: Joel Fernandes <[email protected]>

Move NMI nesting tracking from the preempt_count bits to a separate per-CPU
counter (nmi_nesting). This is to free up the NMI bits in the preempt_count,
allowing those bits to be repurposed for other uses.  This also has the benefit
of tracking more than 16-levels deep if there is ever a need.

Reduce multiple bits in preempt_count for NMI tracking. Reduce NMI_BITS
from 3 to 1, using it only to detect if we're in an NMI.

Suggested-by: Boqun Feng <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
Signed-off-by: Lyude Paul <[email protected]>
Signed-off-by: Boqun Feng <[email protected]>
Link: https://patch.msgid.link/[email protected]
---
 include/linux/hardirq.h | 16 ++++++++++++----
 include/linux/preempt.h | 13 +++++++++----
 kernel/softirq.c        |  2 ++
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d57cab4d4c06..cc06bda52c3e 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -10,6 +10,8 @@
 #include <linux/vtime.h>
 #include <asm/hardirq.h>
 
+DECLARE_PER_CPU(unsigned int, nmi_nesting);
+
 extern void synchronize_irq(unsigned int irq);
 extern bool synchronize_hardirq(unsigned int irq);
 
@@ -102,14 +104,16 @@ void irq_exit_rcu(void);
  */
 
 /*
- * nmi_enter() can nest up to 15 times; see NMI_BITS.
+ * nmi_enter() can nest - nesting is tracked in a per-CPU counter.
  */
 #define __nmi_enter()                                          \
        do {                                                    \
                lockdep_off();                                  \
                arch_nmi_enter();                               \
-               BUG_ON(in_nmi() == NMI_MASK);                   \
-               __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);       \
+               BUG_ON(__this_cpu_read(nmi_nesting) == UINT_MAX);       \
+               __this_cpu_inc(nmi_nesting);                    \
+               __preempt_count_add(HARDIRQ_OFFSET);            \
+               preempt_count_set(preempt_count() | NMI_MASK);  \
        } while (0)
 
 #define nmi_enter()                                            \
@@ -124,8 +128,12 @@ void irq_exit_rcu(void);
 
 #define __nmi_exit()                                           \
        do {                                                    \
+               unsigned int nesting;                           \
                BUG_ON(!in_nmi());                              \
-               __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);       \
+               __preempt_count_sub(HARDIRQ_OFFSET);            \
+               nesting = __this_cpu_dec_return(nmi_nesting);   \
+               if (!nesting)                                   \
+                       __preempt_count_sub(NMI_OFFSET);        \
                arch_nmi_exit();                                \
                lockdep_on();                                   \
        } while (0)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index f07e7f37f3ca..e2d3079d3f5f 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -18,6 +18,8 @@
  * - bits 0-7 are the preemption count (max preemption depth: 256)
  * - bits 8-15 are the softirq count (max # of softirqs: 256)
  * - bits 16-23 are the hardirq disable count (max # of hardirq disable: 256)
+ * - bits 24-27 are the hardirq count (max # of hardirqs: 16)
+ * - bit 28 is the NMI flag (no nesting count, tracked separately)
  *
  * The hardirq count could in theory be the same as the number of
  * interrupts in the system, but we run all interrupt handlers with
@@ -25,18 +27,21 @@
  * there are a few palaeontologic drivers which reenable interrupts in
  * the handler, so we need more than one bit here.
  *
+ * NMI nesting depth is tracked in a separate per-CPU variable
+ * (nmi_nesting) to save bits in preempt_count.
+ *
  *         PREEMPT_MASK:       0x000000ff
  *         SOFTIRQ_MASK:       0x0000ff00
  * HARDIRQ_DISABLE_MASK:       0x00ff0000
- *         HARDIRQ_MASK:       0x07000000
- *             NMI_MASK:       0x38000000
+ *         HARDIRQ_MASK:       0x0f000000
+ *             NMI_MASK:       0x10000000
  * PREEMPT_NEED_RESCHED:       0x80000000
  */
 #define PREEMPT_BITS   8
 #define SOFTIRQ_BITS   8
 #define HARDIRQ_DISABLE_BITS   8
-#define HARDIRQ_BITS   3
-#define NMI_BITS       3
+#define HARDIRQ_BITS   4
+#define NMI_BITS       1
 
 #define PREEMPT_SHIFT  0
 #define SOFTIRQ_SHIFT  (PREEMPT_SHIFT + PREEMPT_BITS)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 4425d8dce44b..10af5ed859e7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -88,6 +88,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
 #endif
 
+DEFINE_PER_CPU(unsigned int, nmi_nesting);
+
 /*
  * SOFTIRQ_OFFSET usage:
  *
-- 
2.50.1 (Apple Git-155)


Reply via email to