Re: [PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct

2020-07-29 Thread Laurent Dufour

Hi Scott,

Le 28/07/2020 à 18:53, Scott Cheloha a écrit :

At memory hot-remove time we can retrieve an LMB's nid from its
corresponding memory_block.  There is no need to store the nid
in multiple locations.

Note that lmb_to_memblock() uses find_memory_block() to get the
corresponding memory_block.  As find_memory_block() runs in sub-linear
time this approach is negligibly slower than what we do at present.

In exchange for this lookup at hot-remove time we no longer need to
call memory_add_physaddr_to_nid() during drmem_init() for each LMB.
On powerpc, memory_add_physaddr_to_nid() is a linear search, so this
spares us an O(n^2) initialization during boot.

On systems with many LMBs that initialization overhead is palpable and
disruptive.  For example, on a box with 249854 LMBs we're seeing
drmem_init() take upwards of 30 seconds to complete:

[   53.721639] drmem: initializing drmem v2
[   80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1]
[   80.604377] Modules linked in:
[   80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4
[   80.604397] NIP:  c00a4980 LR: c00a4940 CTR: 
[   80.604407] REGS: c0002dbff8493830 TRAP: 0901   Not tainted  (5.6.0-rc2+)
[   80.604412] MSR:  82009033   CR: 44000248  
XER: 000d
[   80.604431] CFAR: c00a4a38 IRQMASK: 0
[   80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 
c0003cfede30
[   80.604431] GPR04:  c0f4095a 002f 
1000
[   80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 
c00c0002fdfb2001
[   80.604431] GPR12:  c0001e8ec200
[   80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0
[   80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0
[   80.604492] Call Trace:
[   80.604498] [c0002dbff8493ac0] [c00a4940] 
hot_add_scn_to_nid+0x60/0x3e0 (unreliable)
[   80.604509] [c0002dbff8493b20] [c0087c10] 
memory_add_physaddr_to_nid+0x20/0x60
[   80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0
[   80.604530] [c0002dbff8493c10] [c0010154] do_one_initcall+0x64/0x2c0
[   80.604540] [c0002dbff8493ce0] [c10c4aa0] 
kernel_init_freeable+0x2d8/0x3a0
[   80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148
[   80.604560] [c0002dbff8493e20] [c000b648] 
ret_from_kernel_thread+0x5c/0x74
[   80.604567] Instruction dump:
[   80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 
7d094214
[   80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e949 
7fbe5040
[   89.047390] drmem: 249854 LMB(s)

With a patched kernel on the same machine we're no longer seeing the
soft lockup.  drmem_init() now completes in negligible time, even when
the LMB count is large.

Signed-off-by: Scott Cheloha 
---
  arch/powerpc/include/asm/drmem.h  | 21 ---
  arch/powerpc/mm/drmem.c   |  6 +-
  .../platforms/pseries/hotplug-memory.c| 19 ++---
  3 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 414d209f45bb..34e4e9b257f5 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -13,9 +13,6 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
-#ifdef CONFIG_MEMORY_HOTPLUG
-   int nid;
-#endif
  };

  struct drmem_lmb_info {
@@ -104,22 +101,4 @@ static inline void 
invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0x;
  }

-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-   lmb->nid = -1;
-}
-#else
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-}
-#endif
-
  #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327cefbc6a..873fcfc7b875 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;

-   for_each_drmem_lmb(lmb) {
+   for_each_drmem_lmb(lmb)
read_drconf_v1_cell(lmb, &prop);
-   lmb_set_nid(lmb);
-   }
  }

  static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)

lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
-
-   lmb_set_nid(lmb);
}
}
  }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotp

Re: [PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct

2020-07-28 Thread David Hildenbrand
On 28.07.20 18:53, Scott Cheloha wrote:
> At memory hot-remove time we can retrieve an LMB's nid from its
> corresponding memory_block.  There is no need to store the nid
> in multiple locations.
> 
> Note that lmb_to_memblock() uses find_memory_block() to get the
> corresponding memory_block.  As find_memory_block() runs in sub-linear
> time this approach is negligibly slower than what we do at present.
> 
> In exchange for this lookup at hot-remove time we no longer need to
> call memory_add_physaddr_to_nid() during drmem_init() for each LMB.
> On powerpc, memory_add_physaddr_to_nid() is a linear search, so this
> spares us an O(n^2) initialization during boot.
> 
> On systems with many LMBs that initialization overhead is palpable and
> disruptive.  For example, on a box with 249854 LMBs we're seeing
> drmem_init() take upwards of 30 seconds to complete:
> 
> [   53.721639] drmem: initializing drmem v2
> [   80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! 
> [swapper/0:1]
> [   80.604377] Modules linked in:
> [   80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4
> [   80.604397] NIP:  c00a4980 LR: c00a4940 CTR: 
> 
> [   80.604407] REGS: c0002dbff8493830 TRAP: 0901   Not tainted  (5.6.0-rc2+)
> [   80.604412] MSR:  82009033   CR: 
> 44000248  XER: 000d
> [   80.604431] CFAR: c00a4a38 IRQMASK: 0
> [   80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 
> c0003cfede30
> [   80.604431] GPR04:  c0f4095a 002f 
> 1000
> [   80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 
> c00c0002fdfb2001
> [   80.604431] GPR12:  c0001e8ec200
> [   80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0
> [   80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0
> [   80.604492] Call Trace:
> [   80.604498] [c0002dbff8493ac0] [c00a4940] 
> hot_add_scn_to_nid+0x60/0x3e0 (unreliable)
> [   80.604509] [c0002dbff8493b20] [c0087c10] 
> memory_add_physaddr_to_nid+0x20/0x60
> [   80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0
> [   80.604530] [c0002dbff8493c10] [c0010154] 
> do_one_initcall+0x64/0x2c0
> [   80.604540] [c0002dbff8493ce0] [c10c4aa0] 
> kernel_init_freeable+0x2d8/0x3a0
> [   80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148
> [   80.604560] [c0002dbff8493e20] [c000b648] 
> ret_from_kernel_thread+0x5c/0x74
> [   80.604567] Instruction dump:
> [   80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 
> 7d094214
> [   80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac 
> e949 7fbe5040
> [   89.047390] drmem: 249854 LMB(s)
> 
> With a patched kernel on the same machine we're no longer seeing the
> soft lockup.  drmem_init() now completes in negligible time, even when
> the LMB count is large.
> 

Yeah, as long as you remove_memory() in memory block granularity, this
is guaranteed to work.

Reviewed-by: David Hildenbrand 


-- 
Thanks,

David / dhildenb



[PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct

2020-07-28 Thread Scott Cheloha
At memory hot-remove time we can retrieve an LMB's nid from its
corresponding memory_block.  There is no need to store the nid
in multiple locations.

Note that lmb_to_memblock() uses find_memory_block() to get the
corresponding memory_block.  As find_memory_block() runs in sub-linear
time this approach is negligibly slower than what we do at present.

In exchange for this lookup at hot-remove time we no longer need to
call memory_add_physaddr_to_nid() during drmem_init() for each LMB.
On powerpc, memory_add_physaddr_to_nid() is a linear search, so this
spares us an O(n^2) initialization during boot.

On systems with many LMBs that initialization overhead is palpable and
disruptive.  For example, on a box with 249854 LMBs we're seeing
drmem_init() take upwards of 30 seconds to complete:

[   53.721639] drmem: initializing drmem v2
[   80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1]
[   80.604377] Modules linked in:
[   80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4
[   80.604397] NIP:  c00a4980 LR: c00a4940 CTR: 
[   80.604407] REGS: c0002dbff8493830 TRAP: 0901   Not tainted  (5.6.0-rc2+)
[   80.604412] MSR:  82009033   CR: 44000248  
XER: 000d
[   80.604431] CFAR: c00a4a38 IRQMASK: 0
[   80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 
c0003cfede30
[   80.604431] GPR04:  c0f4095a 002f 
1000
[   80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 
c00c0002fdfb2001
[   80.604431] GPR12:  c0001e8ec200
[   80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0
[   80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0
[   80.604492] Call Trace:
[   80.604498] [c0002dbff8493ac0] [c00a4940] 
hot_add_scn_to_nid+0x60/0x3e0 (unreliable)
[   80.604509] [c0002dbff8493b20] [c0087c10] 
memory_add_physaddr_to_nid+0x20/0x60
[   80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0
[   80.604530] [c0002dbff8493c10] [c0010154] do_one_initcall+0x64/0x2c0
[   80.604540] [c0002dbff8493ce0] [c10c4aa0] 
kernel_init_freeable+0x2d8/0x3a0
[   80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148
[   80.604560] [c0002dbff8493e20] [c000b648] 
ret_from_kernel_thread+0x5c/0x74
[   80.604567] Instruction dump:
[   80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 
7d094214
[   80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e949 
7fbe5040
[   89.047390] drmem: 249854 LMB(s)

With a patched kernel on the same machine we're no longer seeing the
soft lockup.  drmem_init() now completes in negligible time, even when
the LMB count is large.

Signed-off-by: Scott Cheloha 
---
 arch/powerpc/include/asm/drmem.h  | 21 ---
 arch/powerpc/mm/drmem.c   |  6 +-
 .../platforms/pseries/hotplug-memory.c| 19 ++---
 3 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 414d209f45bb..34e4e9b257f5 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -13,9 +13,6 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
-#ifdef CONFIG_MEMORY_HOTPLUG
-   int nid;
-#endif
 };
 
 struct drmem_lmb_info {
@@ -104,22 +101,4 @@ static inline void 
invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0x;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-   lmb->nid = -1;
-}
-#else
-static inline void lmb_set_nid(struct drmem_lmb *lmb)
-{
-}
-static inline void lmb_clear_nid(struct drmem_lmb *lmb)
-{
-}
-#endif
-
 #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327cefbc6a..873fcfc7b875 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
 
-   for_each_drmem_lmb(lmb) {
+   for_each_drmem_lmb(lmb)
read_drconf_v1_cell(lmb, &prop);
-   lmb_set_nid(lmb);
-   }
 }
 
 static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
-
-   lmb_set_nid(lmb);
}
}
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5ace2f9a277e..7bf66fdcf916 100644
--- a/arch/po