On 6/7/21 8:32 PM, Dan Williams wrote:
> On Mon, Jun 7, 2021 at 6:49 AM Joao Martins <[email protected]> wrote:
> [..]
>>> Given all of the above I'm wondering if there should be a new
>>> "compound specific" flavor of this routine rather than trying to
>>> cleverly inter mingle the old path with the new. This is easier
>>> because you have already done the work in previous patches to break
>>> this into helpers. So just have memmap_init_zone_device() do it the
>>> "old" way and memmap_init_compound() handle all the tail page init +
>>> optimizations.
>>>
>> I can separate it out, should be easier to follow.
>>
>> Albeit just a note, I think memmap_init_compound() should be the new normal
>> as metadata
>> more accurately represents what goes on the page tables. That's regardless of
>> vmemmap-based gains, and hence why my train of thought was to not separate
>> it.
>>
>> After this series, all devmap pages where @geometry matches @align will have
>> compound
>> pages be used instead. And we enable that in device-dax as first user (in
>> the next patch).
>> altmap or not so far just differentiates on the uniqueness of struct pages
>> as the former
>> doesn't reuse base pages that only contain tail pages and consequently makes
>> us initialize
>> all tail struct pages.
>
> I think combining the cases into a common central routine makes the
> code that much harder to maintain. A small duplication cost is worth
> it in my opinion to help readability / maintainability.
>
I am addressing this comment and taking a step back. By just moving the tail
page init to
memmap_init_compound() this gets a lot more readable. Albeit now I think having
separate
top-level loops over pfns, doesn't bring much improvement there.
Here's what I have by moving just tails init to a separate routine. See your
original
suggestion after the scissors mark. I have a slight inclination towards the
first one, but
no really strong preference. Thoughts?
[...]
static void __ref memmap_init_compound(struct page *page, unsigned long pfn,
unsigned long zone_idx, int nid,
struct dev_pagemap *pgmap,
unsigned long nr_pages)
{
unsigned int order_align = order_base_2(nr_pages);
unsigned long i;
__SetPageHead(page);
for (i = 1; i < nr_pages; i++) {
__init_zone_device_page(page + i, pfn + i, zone_idx,
nid, pgmap);
prep_compound_tail(page, i);
/*
* The first and second tail pages need to
* initialized first, hence the head page is
* prepared last.
*/
if (i == 2)
prep_compound_head(page, order_align);
}
}
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages,
struct dev_pagemap *pgmap)
{
unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
unsigned long pfns_per_compound = pgmap_pfn_geometry(pgmap);
unsigned long zone_idx = zone_idx(zone);
unsigned long start = jiffies;
int nid = pgdat->node_id;
if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE))
return;
/*
* The call to memmap_init_zone should have already taken care
* of the pages reserved for the memmap, so we can just jump to
* the end of that region and start processing the device pages.
*/
if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
nr_pages = end_pfn - start_pfn;
}
for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
struct page *page = pfn_to_page(pfn);
__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
if (pfns_per_compound == 1)
continue;
memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
pfns_per_compound);
}
pr_info("%s initialised %lu pages in %ums\n", __func__,
nr_pages, jiffies_to_msecs(jiffies - start));
}
[...]
--->8-----
Whereas your original suggestion would look more like this:
[...]
static void __ref memmap_init_compound(unsigned long zone_idx, int nid,
struct dev_pagemap *pgmap,
unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long pfns_per_compound = pgmap_pfn_geometry(pgmap);
unsigned int order_align = order_base_2(pfns_per_compound);
unsigned long i;
for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
struct page *page = pfn_to_page(pfn);
__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
__SetPageHead(page);
for (i = 1; i < pfns_per_compound; i++) {
__init_zone_device_page(page + i, pfn + i, zone_idx,
nid, pgmap);
prep_compound_tail(page, i);
/*
* The first and second tail pages need to
* initialized first, hence the head page is
* prepared last.
*/
if (i == 2)
prep_compound_head(page, order_align);
}
}
}
static void __ref memmap_init_base(unsigned long zone_idx, int nid,
struct dev_pagemap *pgmap,
unsigned long start_pfn,
unsigned long end_pfn)
{
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_page(pfn);
__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
}
}
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages,
struct dev_pagemap *pgmap)
{
unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
bool compound = pgmap_geometry(pgmap) > PAGE_SIZE;
unsigned long zone_idx = zone_idx(zone);
unsigned long start = jiffies;
int nid = pgdat->node_id;
if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE))
return;
/*
* The call to memmap_init_zone should have already taken care
* of the pages reserved for the memmap, so we can just jump to
* the end of that region and start processing the device pages.
*/
if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
nr_pages = end_pfn - start_pfn;
}
if (compound)
memmap_init_compound(zone_idx, nid, pgmap, start_pfn, end_pfn);
else
memmap_init_base(zone_idx, nid, pgmap, start_pfn, end_pfn);
pr_info("%s initialised %lu pages in %ums\n", __func__,
nr_pages, jiffies_to_msecs(jiffies - start));
}