On Wed, Feb 08, 2017 at 07:57:27PM -0800, Matthew Wilcox wrote:
> On Thu, Jan 26, 2017 at 02:57:43PM +0300, Kirill A. Shutemov wrote:
> > +++ b/include/linux/pagemap.h
> > @@ -332,6 +332,15 @@ static inline struct page 
> > *grab_cache_page_nowait(struct address_space *mapping,
> >                     mapping_gfp_mask(mapping));
> >  }
> >  
> > +static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> > +{
> > +   VM_BUG_ON_PAGE(PageTail(page), page);
> > +   VM_BUG_ON_PAGE(page->index > offset, page);
> > +   VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset,
> > +                   page);
> > +   return page - page->index + offset;
> > +}
> 
> What would you think to:
> 
> static inline void check_page_index(struct page *page, pgoff_t offset)
> {
>       VM_BUG_ON_PAGE(PageTail(page), page);
>       VM_BUG_ON_PAGE(page->index > offset, page);
>       VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset,
>                       page);
> }
> 
> (I think I fixed an off-by-one error up there ...  if
> index + (1 << order) == offset, this is also a bug, right?
> because offset would then refer to the next page, not this page)

Right, thanks.

> 
> static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> {
>       check_page_index(page, offset);
>       return page + (offset - page->index);
> }
> 
> ... then you can use check_page_index down ...

Okay, makes sense.

> 
> > @@ -1250,7 +1233,6 @@ struct page *find_lock_entry(struct address_space 
> > *mapping, pgoff_t offset)
> >                     put_page(page);
> >                     goto repeat;
> >             }
> > -           VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
> 
> ... here?

Ok.

> > @@ -1472,25 +1451,35 @@ unsigned find_get_pages(struct address_space 
> > *mapping, pgoff_t start,
> >                     goto repeat;
> >             }
> >  
> > +           /* For multi-order entries, find relevant subpage */
> > +           if (PageTransHuge(page)) {
> > +                   VM_BUG_ON(index - page->index < 0);
> > +                   VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +                   page += index - page->index;
> > +           }
> 
> Use find_subpage() here?

Ok.

> >             pages[ret] = page;
> >             if (++ret == nr_pages)
> >                     break;
> > +           if (!PageTransCompound(page))
> > +                   continue;
> > +           for (refs = 0; ret < nr_pages &&
> > +                           (index + 1) % HPAGE_PMD_NR;
> > +                           ret++, refs++, index++)
> > +                   pages[ret] = ++page;
> > +           if (refs)
> > +                   page_ref_add(compound_head(page), refs);
> > +           if (ret == nr_pages)
> > +                   break;
> 
> Can we avoid referencing huge pages specifically in the page cache?  I'd
> like us to get to the point where we can put arbitrary compound pages into
> the page cache.  For example, I think this can be written as:
> 
>               if (!PageCompound(page))
>                       continue;
>               for (refs = 0; ret < nr_pages; refs++, index++) {
>                       if (index > page->index + (1 << compound_order(page)))
>                               break;
>                       pages[ret++] = ++page;
>               }
>               if (refs)
>                       page_ref_add(compound_head(page), refs);
>               if (ret == nr_pages)
>                       break;

That's slightly more costly, but I guess that's fine.

> > @@ -1541,19 +1533,12 @@ unsigned find_get_pages_contig(struct address_space 
> > *mapping, pgoff_t index,
> >  
> > +           /* For multi-order entries, find relevant subpage */
> > +           if (PageTransHuge(page)) {
> > +                   VM_BUG_ON(index - page->index < 0);
> > +                   VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +                   page += index - page->index;
> > +           }
> > +
> >             pages[ret] = page;
> >             if (++ret == nr_pages)
> >                     break;
> > +           if (!PageTransCompound(page))
> > +                   continue;
> > +           for (refs = 0; ret < nr_pages &&
> > +                           (index + 1) % HPAGE_PMD_NR;
> > +                           ret++, refs++, index++)
> > +                   pages[ret] = ++page;
> > +           if (refs)
> > +                   page_ref_add(compound_head(page), refs);
> > +           if (ret == nr_pages)
> > +                   break;
> >     }
> >     rcu_read_unlock();
> >     return ret;
> 
> Ugh, the same code again.  Hmm ... we only need to modify 'ret' as a result
> of this ... so could we split it out like this?
> 
> static unsigned populate_pages(struct page **pages, unsigned i, unsigned max,
>                                 struct page *page)
> {
>         unsigned refs = 0;
>         for (;;) {
>                 pages[i++] = page;
>                 if (i == max)
>                         break;
>                 if (PageHead(page + 1))
>                         break;

Hm? PageHead()? No. The next page can head or small.

I *guess* we can get away with !PageTail(page + 1)...

>                 page++;
>                 refs++;
>         }
>         if (refs)
>                 page_ref_add(compound_head(page), refs);
>         return i;
> }
> 
> > +unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
> >                     int tag, unsigned int nr_pages, struct page **pages)
> 
> >                     break;
> > +           if (!PageTransCompound(page))
> > +                   continue;
> > +           for (refs = 0; ret < nr_pages &&
> > +                           (index + 1) % HPAGE_PMD_NR;
> > +                           ret++, refs++, index++)
> > +                   pages[ret] = ++page;
> > +           if (refs)
> > +                   page_ref_add(compound_head(page), refs);
> > +           if (ret == nr_pages)
> > +                   break;
> >     }
> 
> ... and again!
> 
> > @@ -2326,25 +2337,26 @@ void filemap_map_pages(struct vm_fault *vmf,
> > +           /* For multi-order entries, find relevant subpage */
> > +           if (PageTransHuge(page)) {
> > +                   VM_BUG_ON(index - page->index < 0);
> > +                   VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +                   page += index - page->index;
> > +           }
> > +
> >             if (!PageUptodate(page) ||
> >                             PageReadahead(page) ||
> 
> Readahead is PF_NO_COMPOUND ... so I don't see why this works?

We wouldn't see pages with readahead flag set/cleared until huge pages
support in ext4 is enabled at very end of the patchset.

> 
> > @@ -2378,8 +2390,14 @@ void filemap_map_pages(struct vm_fault *vmf,
> >             /* Huge page is mapped? No need to proceed. */
> >             if (pmd_trans_huge(*vmf->pmd))
> >                     break;
> > -           if (iter.index == end_pgoff)
> > +           if (index == end_pgoff)
> >                     break;
> > +           if (page && PageTransCompound(page) &&
> > +                           (index & (HPAGE_PMD_NR - 1)) !=
> > +                           HPAGE_PMD_NR - 1) {
> > +                   index++;
> > +                   goto repeat;
> 
> Do we really have to go all the way back to the beginning of the loop?  It'd
> be nice to be able to insert all of the relevant PTEs in a shorter loop here.
> We'd need to bump the reference count N more times, and I think we do need
> to check HWPoison for each subpage.

I'll look into it.

Thanks for review.

-- 
 Kirill A. Shutemov

Reply via email to