Re: [PATCH 6/9] mm, page_alloc: cache pageset high and batch in struct zone
On 10/5/20 3:28 PM, Michal Hocko wrote: On Tue 22-09-20 16:37:09, Vlastimil Babka wrote: All per-cpu pagesets for a zone use the same high and batch values, that are duplicated there just for performance (locality) reasons. This patch adds the same variables also to struct zone as a shared copy. This will be useful later for making possible to disable pcplists temporarily by setting high value to 0, while remembering the values for restoring them later. But we can also immediately benefit from not updating pagesets of all possible cpus in case the newly recalculated values (after sysctl change or memory online/offline) are actually unchanged from the previous ones. Advantage of this patch is not really clear from it in isolation. Maybe merge it with the patch which uses the duplicated state. I'm not sure that would help its reviewability? As the patch that uses it is the last, largest one. And there is already a small advantage right away as changelog explains.
Re: [PATCH 6/9] mm, page_alloc: cache pageset high and batch in struct zone
On 9/25/20 12:34 PM, David Hildenbrand wrote: On 22.09.20 16:37, Vlastimil Babka wrote: @@ -6300,6 +6310,8 @@ static __meminit void zone_pcp_init(struct zone *zone) * offset of a (static) per cpu variable into the per cpu area. */ zone->pageset = &boot_pageset; + zone->pageset_high = BOOT_PAGESET_HIGH; + zone->pageset_batch = BOOT_PAGESET_BATCH; I do wonder if copying from any cpuvar inside boot_pageset is cleaner. zone->pageset_high = &this_cpu_ptr(zone->pageset)->pcp.high; Uh I don't know. That would be like admitting they can be different than what was initialized. But then they could be also different depending on what cpu we happen to run it on. It's why I added the #define BOOT_PAGESET_* in the first place - to ensure same value used in two places. Makes sense?
Re: [PATCH 6/9] mm, page_alloc: cache pageset high and batch in struct zone
On Tue 22-09-20 16:37:09, Vlastimil Babka wrote: > All per-cpu pagesets for a zone use the same high and batch values, that are > duplicated there just for performance (locality) reasons. This patch adds the > same variables also to struct zone as a shared copy. > > This will be useful later for making possible to disable pcplists temporarily > by setting high value to 0, while remembering the values for restoring them > later. But we can also immediately benefit from not updating pagesets of all > possible cpus in case the newly recalculated values (after sysctl change or > memory online/offline) are actually unchanged from the previous ones. Advantage of this patch is not really clear from it in isolation. Maybe merge it with the patch which uses the duplicated state. > > Signed-off-by: Vlastimil Babka > --- > include/linux/mmzone.h | 6 ++ > mm/page_alloc.c| 16 ++-- > 2 files changed, 20 insertions(+), 2 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 90721f3156bc..7ad3f14dbe88 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -470,6 +470,12 @@ struct zone { > #endif > struct pglist_data *zone_pgdat; > struct per_cpu_pageset __percpu *pageset; > + /* > + * the high and batch values are copied to individual pagesets for > + * faster access > + */ > + int pageset_high; > + int pageset_batch; > > #ifndef CONFIG_SPARSEMEM > /* > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index de3b48bda45c..901907799bdc 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -5824,6 +5824,8 @@ static void build_zonelists(pg_data_t *pgdat) > * Other parts of the kernel may not check if the zone is available. > */ > static void pageset_init(struct per_cpu_pageset *p); > +#define BOOT_PAGESET_HIGH0 > +#define BOOT_PAGESET_BATCH 1 > static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); > static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); > > @@ -6213,8 +6215,8 @@ static void pageset_init(struct per_cpu_pageset *p) >* need to be as careful as pageset_update() as nobody can access the >* pageset yet. >*/ > - pcp->high = 0; > - pcp->batch = 1; > + pcp->high = BOOT_PAGESET_HIGH; > + pcp->batch = BOOT_PAGESET_BATCH; > } > > /* > @@ -6238,6 +6240,14 @@ static void zone_set_pageset_high_and_batch(struct > zone *zone) > new_batch = max(1UL, 1 * new_batch); > } > > + if (zone->pageset_high != new_high || > + zone->pageset_batch != new_batch) { > + zone->pageset_high = new_high; > + zone->pageset_batch = new_batch; > + } else { > + return; > + } > + > for_each_possible_cpu(cpu) { > p = per_cpu_ptr(zone->pageset, cpu); > pageset_update(&p->pcp, new_high, new_batch); > @@ -6300,6 +6310,8 @@ static __meminit void zone_pcp_init(struct zone *zone) >* offset of a (static) per cpu variable into the per cpu area. >*/ > zone->pageset = &boot_pageset; > + zone->pageset_high = BOOT_PAGESET_HIGH; > + zone->pageset_batch = BOOT_PAGESET_BATCH; > > if (populated_zone(zone)) > printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", > -- > 2.28.0 -- Michal Hocko SUSE Labs
Re: [PATCH 6/9] mm, page_alloc: cache pageset high and batch in struct zone
On 22.09.20 16:37, Vlastimil Babka wrote: > All per-cpu pagesets for a zone use the same high and batch values, that are > duplicated there just for performance (locality) reasons. This patch adds the > same variables also to struct zone as a shared copy. > > This will be useful later for making possible to disable pcplists temporarily > by setting high value to 0, while remembering the values for restoring them > later. But we can also immediately benefit from not updating pagesets of all > possible cpus in case the newly recalculated values (after sysctl change or > memory online/offline) are actually unchanged from the previous ones. > > Signed-off-by: Vlastimil Babka > --- > include/linux/mmzone.h | 6 ++ > mm/page_alloc.c| 16 ++-- > 2 files changed, 20 insertions(+), 2 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 90721f3156bc..7ad3f14dbe88 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -470,6 +470,12 @@ struct zone { > #endif > struct pglist_data *zone_pgdat; > struct per_cpu_pageset __percpu *pageset; > + /* > + * the high and batch values are copied to individual pagesets for > + * faster access > + */ > + int pageset_high; > + int pageset_batch; > > #ifndef CONFIG_SPARSEMEM > /* > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index de3b48bda45c..901907799bdc 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -5824,6 +5824,8 @@ static void build_zonelists(pg_data_t *pgdat) > * Other parts of the kernel may not check if the zone is available. > */ > static void pageset_init(struct per_cpu_pageset *p); > +#define BOOT_PAGESET_HIGH0 > +#define BOOT_PAGESET_BATCH 1 Much better. A comment would have been nice ("this disables the pcp via the boot pageset completely.") :) (I'm pretty sure I'd forget at one point what these values mean) > static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); > static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); > > @@ -6213,8 +6215,8 @@ static void pageset_init(struct per_cpu_pageset *p) >* need to be as careful as pageset_update() as nobody can access the >* pageset yet. >*/ > - pcp->high = 0; > - pcp->batch = 1; > + pcp->high = BOOT_PAGESET_HIGH; > + pcp->batch = BOOT_PAGESET_BATCH; > } > > /* > @@ -6238,6 +6240,14 @@ static void zone_set_pageset_high_and_batch(struct > zone *zone) > new_batch = max(1UL, 1 * new_batch); > } > > + if (zone->pageset_high != new_high || > + zone->pageset_batch != new_batch) { > + zone->pageset_high = new_high; > + zone->pageset_batch = new_batch; > + } else { > + return; > + } > + > for_each_possible_cpu(cpu) { > p = per_cpu_ptr(zone->pageset, cpu); > pageset_update(&p->pcp, new_high, new_batch); > @@ -6300,6 +6310,8 @@ static __meminit void zone_pcp_init(struct zone *zone) >* offset of a (static) per cpu variable into the per cpu area. >*/ > zone->pageset = &boot_pageset; > + zone->pageset_high = BOOT_PAGESET_HIGH; > + zone->pageset_batch = BOOT_PAGESET_BATCH; I do wonder if copying from any cpuvar inside boot_pageset is cleaner. zone->pageset_high = &this_cpu_ptr(zone->pageset)->pcp.high; ... -- Thanks, David / dhildenb
[PATCH 6/9] mm, page_alloc: cache pageset high and batch in struct zone
All per-cpu pagesets for a zone use the same high and batch values, that are duplicated there just for performance (locality) reasons. This patch adds the same variables also to struct zone as a shared copy. This will be useful later for making possible to disable pcplists temporarily by setting high value to 0, while remembering the values for restoring them later. But we can also immediately benefit from not updating pagesets of all possible cpus in case the newly recalculated values (after sysctl change or memory online/offline) are actually unchanged from the previous ones. Signed-off-by: Vlastimil Babka --- include/linux/mmzone.h | 6 ++ mm/page_alloc.c| 16 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 90721f3156bc..7ad3f14dbe88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -470,6 +470,12 @@ struct zone { #endif struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* +* the high and batch values are copied to individual pagesets for +* faster access +*/ + int pageset_high; + int pageset_batch; #ifndef CONFIG_SPARSEMEM /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index de3b48bda45c..901907799bdc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5824,6 +5824,8 @@ static void build_zonelists(pg_data_t *pgdat) * Other parts of the kernel may not check if the zone is available. */ static void pageset_init(struct per_cpu_pageset *p); +#define BOOT_PAGESET_HIGH 0 +#define BOOT_PAGESET_BATCH 1 static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); @@ -6213,8 +6215,8 @@ static void pageset_init(struct per_cpu_pageset *p) * need to be as careful as pageset_update() as nobody can access the * pageset yet. */ - pcp->high = 0; - pcp->batch = 1; + pcp->high = BOOT_PAGESET_HIGH; + pcp->batch = BOOT_PAGESET_BATCH; } /* @@ -6238,6 +6240,14 @@ static void zone_set_pageset_high_and_batch(struct zone *zone) new_batch = max(1UL, 1 * new_batch); } + if (zone->pageset_high != new_high || + zone->pageset_batch != new_batch) { + zone->pageset_high = new_high; + zone->pageset_batch = new_batch; + } else { + return; + } + for_each_possible_cpu(cpu) { p = per_cpu_ptr(zone->pageset, cpu); pageset_update(&p->pcp, new_high, new_batch); @@ -6300,6 +6310,8 @@ static __meminit void zone_pcp_init(struct zone *zone) * offset of a (static) per cpu variable into the per cpu area. */ zone->pageset = &boot_pageset; + zone->pageset_high = BOOT_PAGESET_HIGH; + zone->pageset_batch = BOOT_PAGESET_BATCH; if (populated_zone(zone)) printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", -- 2.28.0