Re: [PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct
Hi Scott, Le 28/07/2020 à 18:53, Scott Cheloha a écrit : At memory hot-remove time we can retrieve an LMB's nid from its corresponding memory_block. There is no need to store the nid in multiple locations. Note that lmb_to_memblock() uses find_memory_block() to get the corresponding memory_block. As find_memory_block() runs in sub-linear time this approach is negligibly slower than what we do at present. In exchange for this lookup at hot-remove time we no longer need to call memory_add_physaddr_to_nid() during drmem_init() for each LMB. On powerpc, memory_add_physaddr_to_nid() is a linear search, so this spares us an O(n^2) initialization during boot. On systems with many LMBs that initialization overhead is palpable and disruptive. For example, on a box with 249854 LMBs we're seeing drmem_init() take upwards of 30 seconds to complete: [ 53.721639] drmem: initializing drmem v2 [ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1] [ 80.604377] Modules linked in: [ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4 [ 80.604397] NIP: c00a4980 LR: c00a4940 CTR: [ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+) [ 80.604412] MSR: 82009033 CR: 44000248 XER: 000d [ 80.604431] CFAR: c00a4a38 IRQMASK: 0 [ 80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 c0003cfede30 [ 80.604431] GPR04: c0f4095a 002f 1000 [ 80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 c00c0002fdfb2001 [ 80.604431] GPR12: c0001e8ec200 [ 80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0 [ 80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0 [ 80.604492] Call Trace: [ 80.604498] [c0002dbff8493ac0] [c00a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable) [ 80.604509] [c0002dbff8493b20] [c0087c10] memory_add_physaddr_to_nid+0x20/0x60 [ 80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0 [ 80.604530] [c0002dbff8493c10] [c0010154] do_one_initcall+0x64/0x2c0 [ 80.604540] [c0002dbff8493ce0] [c10c4aa0] kernel_init_freeable+0x2d8/0x3a0 [ 80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148 [ 80.604560] [c0002dbff8493e20] [c000b648] ret_from_kernel_thread+0x5c/0x74 [ 80.604567] Instruction dump: [ 80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 7d094214 [ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e949 7fbe5040 [ 89.047390] drmem: 249854 LMB(s) With a patched kernel on the same machine we're no longer seeing the soft lockup. drmem_init() now completes in negligible time, even when the LMB count is large. Signed-off-by: Scott Cheloha --- arch/powerpc/include/asm/drmem.h | 21 --- arch/powerpc/mm/drmem.c | 6 +- .../platforms/pseries/hotplug-memory.c| 19 ++--- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 414d209f45bb..34e4e9b257f5 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -13,9 +13,6 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { @@ -104,22 +101,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0x; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..873fcfc7b875 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotp
Re: [PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct
On 28.07.20 18:53, Scott Cheloha wrote: > At memory hot-remove time we can retrieve an LMB's nid from its > corresponding memory_block. There is no need to store the nid > in multiple locations. > > Note that lmb_to_memblock() uses find_memory_block() to get the > corresponding memory_block. As find_memory_block() runs in sub-linear > time this approach is negligibly slower than what we do at present. > > In exchange for this lookup at hot-remove time we no longer need to > call memory_add_physaddr_to_nid() during drmem_init() for each LMB. > On powerpc, memory_add_physaddr_to_nid() is a linear search, so this > spares us an O(n^2) initialization during boot. > > On systems with many LMBs that initialization overhead is palpable and > disruptive. For example, on a box with 249854 LMBs we're seeing > drmem_init() take upwards of 30 seconds to complete: > > [ 53.721639] drmem: initializing drmem v2 > [ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! > [swapper/0:1] > [ 80.604377] Modules linked in: > [ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4 > [ 80.604397] NIP: c00a4980 LR: c00a4940 CTR: > > [ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+) > [ 80.604412] MSR: 82009033 CR: > 44000248 XER: 000d > [ 80.604431] CFAR: c00a4a38 IRQMASK: 0 > [ 80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 > c0003cfede30 > [ 80.604431] GPR04: c0f4095a 002f > 1000 > [ 80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 > c00c0002fdfb2001 > [ 80.604431] GPR12: c0001e8ec200 > [ 80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0 > [ 80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0 > [ 80.604492] Call Trace: > [ 80.604498] [c0002dbff8493ac0] [c00a4940] > hot_add_scn_to_nid+0x60/0x3e0 (unreliable) > [ 80.604509] [c0002dbff8493b20] [c0087c10] > memory_add_physaddr_to_nid+0x20/0x60 > [ 80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0 > [ 80.604530] [c0002dbff8493c10] [c0010154] > do_one_initcall+0x64/0x2c0 > [ 80.604540] [c0002dbff8493ce0] [c10c4aa0] > kernel_init_freeable+0x2d8/0x3a0 > [ 80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148 > [ 80.604560] [c0002dbff8493e20] [c000b648] > ret_from_kernel_thread+0x5c/0x74 > [ 80.604567] Instruction dump: > [ 80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 > 7d094214 > [ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac > e949 7fbe5040 > [ 89.047390] drmem: 249854 LMB(s) > > With a patched kernel on the same machine we're no longer seeing the > soft lockup. drmem_init() now completes in negligible time, even when > the LMB count is large. > Yeah, as long as you remove_memory() in memory block granularity, this is guaranteed to work. Reviewed-by: David Hildenbrand -- Thanks, David / dhildenb
[PATCH v2] pseries/drmem: don't cache node id in drmem_lmb struct
At memory hot-remove time we can retrieve an LMB's nid from its corresponding memory_block. There is no need to store the nid in multiple locations. Note that lmb_to_memblock() uses find_memory_block() to get the corresponding memory_block. As find_memory_block() runs in sub-linear time this approach is negligibly slower than what we do at present. In exchange for this lookup at hot-remove time we no longer need to call memory_add_physaddr_to_nid() during drmem_init() for each LMB. On powerpc, memory_add_physaddr_to_nid() is a linear search, so this spares us an O(n^2) initialization during boot. On systems with many LMBs that initialization overhead is palpable and disruptive. For example, on a box with 249854 LMBs we're seeing drmem_init() take upwards of 30 seconds to complete: [ 53.721639] drmem: initializing drmem v2 [ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1] [ 80.604377] Modules linked in: [ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4 [ 80.604397] NIP: c00a4980 LR: c00a4940 CTR: [ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+) [ 80.604412] MSR: 82009033 CR: 44000248 XER: 000d [ 80.604431] CFAR: c00a4a38 IRQMASK: 0 [ 80.604431] GPR00: c00a4940 c0002dbff8493ac0 c1904400 c0003cfede30 [ 80.604431] GPR04: c0f4095a 002f 1000 [ 80.604431] GPR08: cbf7ecdb7fb8 cbf7ecc2d3c8 0008 c00c0002fdfb2001 [ 80.604431] GPR12: c0001e8ec200 [ 80.604477] NIP [c00a4980] hot_add_scn_to_nid+0xa0/0x3e0 [ 80.604486] LR [c00a4940] hot_add_scn_to_nid+0x60/0x3e0 [ 80.604492] Call Trace: [ 80.604498] [c0002dbff8493ac0] [c00a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable) [ 80.604509] [c0002dbff8493b20] [c0087c10] memory_add_physaddr_to_nid+0x20/0x60 [ 80.604521] [c0002dbff8493b40] [c10d4880] drmem_init+0x25c/0x2f0 [ 80.604530] [c0002dbff8493c10] [c0010154] do_one_initcall+0x64/0x2c0 [ 80.604540] [c0002dbff8493ce0] [c10c4aa0] kernel_init_freeable+0x2d8/0x3a0 [ 80.604550] [c0002dbff8493db0] [c0010824] kernel_init+0x2c/0x148 [ 80.604560] [c0002dbff8493e20] [c000b648] ret_from_kernel_thread+0x5c/0x74 [ 80.604567] Instruction dump: [ 80.604574] 392918e8 e949 e90a000a e92a 80ea000c 1d080018 3908ffe8 7d094214 [ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e949 7fbe5040 [ 89.047390] drmem: 249854 LMB(s) With a patched kernel on the same machine we're no longer seeing the soft lockup. drmem_init() now completes in negligible time, even when the LMB count is large. Signed-off-by: Scott Cheloha --- arch/powerpc/include/asm/drmem.h | 21 --- arch/powerpc/mm/drmem.c | 6 +- .../platforms/pseries/hotplug-memory.c| 19 ++--- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 414d209f45bb..34e4e9b257f5 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -13,9 +13,6 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { @@ -104,22 +101,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0x; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..873fcfc7b875 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5ace2f9a277e..7bf66fdcf916 100644 --- a/arch/po