On Fri, Jun 19, 2020 at 05:59:20AM -0700, Christoph Hellwig wrote:
> After looking at v2 yesterday I noticed I few things in the structure
> that I really didn't like:
> 
>  - using a struct page * return value just to communicate status codes
>  - the extremely long function names
>  - a generally somewhat non-intuitive split of the helpers
> 
> I then hacked on top of it for a bit while sitting in a telephone
> conference.  Below is my result, which passes a quick xfstests run.
> Note that this includes the refactoring and the batch lookup changes
> as I did it on top of your series:

I like it - I can't get your patch to apply to anything though, do you have it
up in a git repo anywhere?

> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index f0ae9a6308cb4d..9e0aecd99950f4 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1972,273 +1972,360 @@ static void shrink_readahead_size_eio(struct 
> file_ra_state *ra)
>       ra->ra_pages /= 4;
>  }
>  
> -/**
> - * generic_file_buffered_read - generic file read routine
> - * @iocb:    the iocb to read
> - * @iter:    data destination
> - * @written: already copied
> - *
> - * This is a generic file read routine, and uses the
> - * mapping->a_ops->readpage() function for the actual low-level stuff.
> - *
> - * This is really ugly. But the goto's actually try to clarify some
> - * of the logic when it comes to error handling etc.
> - *
> - * Return:
> - * * total number of bytes copied, including those the were already @written
> - * * negative error code if nothing was copied
> - */
> -ssize_t generic_file_buffered_read(struct kiocb *iocb,
> -             struct iov_iter *iter, ssize_t written)
> +static inline pgoff_t filemap_last_index(struct kiocb *iocb,
> +             struct iov_iter *iter)
>  {
> -     struct file *filp = iocb->ki_filp;
> -     struct address_space *mapping = filp->f_mapping;
> -     struct inode *inode = mapping->host;
> -     struct file_ra_state *ra = &filp->f_ra;
> -     loff_t *ppos = &iocb->ki_pos;
> -     pgoff_t index;
> -     pgoff_t last_index;
> -     pgoff_t prev_index;
> -     unsigned long offset;      /* offset into pagecache page */
> -     unsigned int prev_offset;
> -     int error = 0;
> -
> -     if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
> -             return 0;
> -     iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
> -
> -     index = *ppos >> PAGE_SHIFT;
> -     prev_index = ra->prev_pos >> PAGE_SHIFT;
> -     prev_offset = ra->prev_pos & (PAGE_SIZE-1);
> -     last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
> -     offset = *ppos & ~PAGE_MASK;
> +     return (iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +}
>  
> -     for (;;) {
> -             struct page *page;
> -             pgoff_t end_index;
> -             loff_t isize;
> -             unsigned long nr, ret;
> +static inline unsigned long filemap_nr_pages(struct kiocb *iocb,
> +             struct iov_iter *iter)
> +{
> +     return filemap_last_index(iocb, iter) - (iocb->ki_pos >> PAGE_SHIFT);
> +}
>  
> -             cond_resched();
> -find_page:
> -             if (fatal_signal_pending(current)) {
> -                     error = -EINTR;
> -                     goto out;
> -             }
> +static int __filemap_read_not_uptodate(struct file *file, struct page *page)
> +{
> +     int error;
>  
> -             page = find_get_page(mapping, index);
> -             if (!page) {
> -                     if (iocb->ki_flags & IOCB_NOWAIT)
> -                             goto would_block;
> -                     page_cache_sync_readahead(mapping,
> -                                     ra, filp,
> -                                     index, last_index - index);
> -                     page = find_get_page(mapping, index);
> -                     if (unlikely(page == NULL))
> -                             goto no_cached_page;
> -             }
> -             if (PageReadahead(page)) {
> -                     page_cache_async_readahead(mapping,
> -                                     ra, filp, page,
> -                                     index, last_index - index);
> -             }
> -             if (!PageUptodate(page)) {
> -                     if (iocb->ki_flags & IOCB_NOWAIT) {
> -                             put_page(page);
> -                             goto would_block;
> -                     }
> +     error = lock_page_killable(page);
> +     if (error)
> +             return error;
>  
> +     if (!PageUptodate(page)) {
> +             if (!page->mapping) {
>                       /*
> -                      * See comment in do_read_cache_page on why
> -                      * wait_on_page_locked is used to avoid unnecessarily
> -                      * serialisations and why it's safe.
> +                      * invalidate_mapping_pages got it
>                        */
> -                     error = wait_on_page_locked_killable(page);
> -                     if (unlikely(error))
> -                             goto readpage_error;
> -                     if (PageUptodate(page))
> -                             goto page_ok;
> -
> -                     if (inode->i_blkbits == PAGE_SHIFT ||
> -                                     !mapping->a_ops->is_partially_uptodate)
> -                             goto page_not_up_to_date;
> -                     /* pipes can't handle partially uptodate pages */
> -                     if (unlikely(iov_iter_is_pipe(iter)))
> -                             goto page_not_up_to_date;
> -                     if (!trylock_page(page))
> -                             goto page_not_up_to_date;
> -                     /* Did it get truncated before we got the lock? */
> -                     if (!page->mapping)
> -                             goto page_not_up_to_date_locked;
> -                     if (!mapping->a_ops->is_partially_uptodate(page,
> -                                                     offset, iter->count))
> -                             goto page_not_up_to_date_locked;
> -                     unlock_page(page);
> +                     error = AOP_TRUNCATED_PAGE;
> +             } else {
> +                     error = -EIO;
>               }
> -page_ok:
> -             /*
> -              * i_size must be checked after we know the page is Uptodate.
> -              *
> -              * Checking i_size after the check allows us to calculate
> -              * the correct value for "nr", which means the zero-filled
> -              * part of the page is not copied back to userspace (unless
> -              * another truncate extends the file - this is desired though).
> -              */
> +     }
>  
> -             isize = i_size_read(inode);
> -             end_index = (isize - 1) >> PAGE_SHIFT;
> -             if (unlikely(!isize || index > end_index)) {
> -                     put_page(page);
> -                     goto out;
> -             }
> +     unlock_page(page);
> +     if (error == -EIO)
> +             shrink_readahead_size_eio(&file->f_ra);
> +     return error;
> +}
>  
> -             /* nr is the maximum number of bytes to copy from this page */
> -             nr = PAGE_SIZE;
> -             if (index == end_index) {
> -                     nr = ((isize - 1) & ~PAGE_MASK) + 1;
> -                     if (nr <= offset) {
> -                             put_page(page);
> -                             goto out;
> -                     }
> -             }
> -             nr = nr - offset;
> +static int __filemap_read_one_page(struct file *file, struct page *page)
> +{
> +     int error;
>  
> -             /* If users can be writing to this page using arbitrary
> -              * virtual addresses, take care about potential aliasing
> -              * before reading the page on the kernel side.
> -              */
> -             if (mapping_writably_mapped(mapping))
> -                     flush_dcache_page(page);
> +     /*
> +      * A previous I/O error may have been due to temporary failures, e.g.
> +      * multipath errors. PG_error will be set again if readpage fails.
> +      */
> +     ClearPageError(page);
>  
> -             /*
> -              * When a sequential read accesses a page several times,
> -              * only mark it as accessed the first time.
> -              */
> -             if (prev_index != index || offset != prev_offset)
> -                     mark_page_accessed(page);
> -             prev_index = index;
> +     /*
> +      * Start the actual read. The read will unlock the page.
> +      */
> +     error = file->f_mapping->a_ops->readpage(file, page);
> +     if (!error && !PageUptodate(page))
> +             return __filemap_read_not_uptodate(file, page);
> +     return error;
> +}
>  
> -             /*
> -              * Ok, we have the page, and it's up-to-date, so
> -              * now we can copy it to user space...
> -              */
> +static int filemap_read_one_page(struct kiocb *iocb, struct iov_iter *iter,
> +             struct page *page, pgoff_t index)
> +{
> +     struct file *file = iocb->ki_filp;
> +     struct address_space *mapping = file->f_mapping;
> +     loff_t pos = max(iocb->ki_pos, (loff_t)index << PAGE_SHIFT);
> +     loff_t count = iocb->ki_pos + iter->count - pos;
> +     int error;
>  
> -             ret = copy_page_to_iter(page, offset, nr, iter);
> -             offset += ret;
> -             index += offset >> PAGE_SHIFT;
> -             offset &= ~PAGE_MASK;
> -             prev_offset = offset;
> +     /*
> +      * See comment in do_read_cache_page on why wait_on_page_locked is used
> +      * to avoid unnecessarily serialisations and why it's safe.
> +      */
> +     error = wait_on_page_locked_killable(page);
> +     if (unlikely(error))
> +             goto out_put_page;
>  
> -             put_page(page);
> -             written += ret;
> -             if (!iov_iter_count(iter))
> -                     goto out;
> -             if (ret < nr) {
> -                     error = -EFAULT;
> -                     goto out;
> -             }
> -             continue;
> +     if (PageUptodate(page))
> +             return 0;
> +
> +     if (mapping->host->i_blkbits == PAGE_SHIFT ||
> +         !mapping->a_ops->is_partially_uptodate)
> +             goto page_not_up_to_date;
> +     /* pipes can't handle partially uptodate pages */
> +     if (unlikely(iov_iter_is_pipe(iter)))
> +             goto page_not_up_to_date;
> +     if (!trylock_page(page))
> +             goto page_not_up_to_date;
> +     /* Did it get truncated before we got the lock? */
> +     if (!page->mapping)
> +             goto page_not_up_to_date_locked;
> +     if (!mapping->a_ops->is_partially_uptodate(page, pos & ~PAGE_MASK,
> +                     count))
> +             goto page_not_up_to_date_locked;
> +done:
> +     unlock_page(page);
> +     return 0;
>  
>  page_not_up_to_date:
> -             /* Get exclusive access to the page ... */
> -             error = lock_page_killable(page);
> -             if (unlikely(error))
> -                     goto readpage_error;
> +     /* Get exclusive access to the page ... */
> +     error = lock_page_killable(page);
> +     if (unlikely(error))
> +             goto out_put_page;
>  
>  page_not_up_to_date_locked:
> -             /* Did it get truncated before we got the lock? */
> -             if (!page->mapping) {
> -                     unlock_page(page);
> -                     put_page(page);
> +     /* Did it get truncated before we got the lock? */
> +     if (!page->mapping) {
> +             error = AOP_TRUNCATED_PAGE;
> +             unlock_page(page);
> +             goto out_put_page;
> +     }
> +
> +     /* Did somebody else fill it already? */
> +     if (PageUptodate(page))
> +             goto done;
> +
> +     error = __filemap_read_one_page(file, page);
> +     if (error)
> +             goto out_put_page;
> +     return 0;
> +
> +out_put_page:
> +     put_page(page);
> +     return error;
> +}
> +
> +static int filemap_read_get_pages(struct kiocb *iocb, struct iov_iter *iter,
> +             struct page **pages, unsigned long max_pages)
> +{
> +     struct file *file = iocb->ki_filp;
> +     struct address_space *mapping = file->f_mapping;
> +     unsigned long nr = min(filemap_nr_pages(iocb, iter), max_pages);
> +     pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
> +     int ret;
> +
> +     if (fatal_signal_pending(current))
> +             return -EINTR;
> +
> +     ret = find_get_pages_contig(mapping, index, nr, pages);
> +     if (ret)
> +             return ret;
> +
> +     if (iocb->ki_flags & IOCB_NOWAIT)
> +             return -EAGAIN;
> +
> +     page_cache_sync_readahead(mapping, &file->f_ra, file, index,
> +                               filemap_nr_pages(iocb, iter));
> +
> +     ret = find_get_pages_contig(mapping, index, nr, pages);
> +     if (ret)
> +             return ret;
> +
> +     /*
> +      * Ok, it wasn't cached, so we need to create a new page..
> +      */
> +     pages[0] = page_cache_alloc(mapping);
> +     if (!pages[0])
> +             return -ENOMEM;
> +
> +     ret = add_to_page_cache_lru(pages[0], mapping, index,
> +                     mapping_gfp_constraint(mapping, GFP_KERNEL));
> +     if (ret) {
> +             if (ret == -EEXIST)
> +                     ret = 0;
> +             goto out_put_page;
> +     }
> +
> +     ret = __filemap_read_one_page(file, pages[0]);
> +     if (ret) {
> +             if (ret == AOP_TRUNCATED_PAGE)
> +                     ret = 0;
> +             goto out_put_page;
> +     }
> +
> +     return 1;
> +
> +out_put_page:
> +     put_page(pages[0]);
> +     return ret;
> +}
> +
> +static int filemap_read_pages(struct kiocb *iocb, struct iov_iter *iter,
> +             struct page **pages, unsigned int nr_pages)
> +{
> +     struct file *file = iocb->ki_filp;
> +     struct address_space *mapping = file->f_mapping;
> +     pgoff_t last_index = filemap_last_index(iocb, iter);
> +     pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
> +     unsigned int cur_page, j;
> +     int err = 0;
> +
> +     for (cur_page = 0; cur_page < nr_pages; cur_page++, index++) {
> +             struct page *page = pages[cur_page];
> +
> +             if (PageReadahead(page))
> +                     page_cache_async_readahead(mapping, &file->f_ra, file,
> +                                     page, index, last_index - index);
> +
> +             if (PageUptodate(page))
>                       continue;
> -             }
>  
> -             /* Did somebody else fill it already? */
> -             if (PageUptodate(page)) {
> -                     unlock_page(page);
> -                     goto page_ok;
> +             if (iocb->ki_flags & IOCB_NOWAIT) {
> +                     put_page(page);
> +                     err = -EAGAIN;
> +                     goto out_put_pages;
>               }
>  
> -readpage:
> -             /*
> -              * A previous I/O error may have been due to temporary
> -              * failures, eg. multipath errors.
> -              * PG_error will be set again if readpage fails.
> -              */
> -             ClearPageError(page);
> -             /* Start the actual read. The read will unlock the page. */
> -             error = mapping->a_ops->readpage(filp, page);
> +             err = filemap_read_one_page(iocb, iter, page, index);
> +             if (err)
> +                     goto out_put_pages;
> +     }
>  
> -             if (unlikely(error)) {
> -                     if (error == AOP_TRUNCATED_PAGE) {
> -                             put_page(page);
> -                             error = 0;
> -                             goto find_page;
> -                     }
> -                     goto readpage_error;
> -             }
> +     return cur_page;
>  
> -             if (!PageUptodate(page)) {
> -                     error = lock_page_killable(page);
> -                     if (unlikely(error))
> -                             goto readpage_error;
> -                     if (!PageUptodate(page)) {
> -                             if (page->mapping == NULL) {
> -                                     /*
> -                                      * invalidate_mapping_pages got it
> -                                      */
> -                                     unlock_page(page);
> -                                     put_page(page);
> -                                     goto find_page;
> -                             }
> -                             unlock_page(page);
> -                             shrink_readahead_size_eio(ra);
> -                             error = -EIO;
> -                             goto readpage_error;
> -                     }
> -                     unlock_page(page);
> -             }
> +out_put_pages:
> +     for (j = cur_page + 1; j < nr_pages; j++)
> +             put_page(pages[j]);
> +     if (cur_page == 0) {
> +             if (err == AOP_TRUNCATED_PAGE)
> +                     err = 0;
> +             return err;
> +     }
> +     return cur_page;
> +}
>  
> -             goto page_ok;
> +static int filemap_do_read(struct kiocb *iocb, struct iov_iter *iter,
> +             struct page **pages, unsigned long max_pages)
> +{
> +     struct file *filp = iocb->ki_filp;
> +     struct address_space *mapping = filp->f_mapping;
> +     struct file_ra_state *ra = &filp->f_ra;
> +     loff_t isize, end_offset;
> +     int nr_pages, i;
> +     bool writably_mapped;
>  
> -readpage_error:
> -             /* UHHUH! A synchronous read error occurred. Report it */
> -             put_page(page);
> -             goto out;
> +     cond_resched();
> +
> +was_truncated:
> +     nr_pages = filemap_read_get_pages(iocb, iter, pages, max_pages);
> +     if (nr_pages > 0)
> +             nr_pages = filemap_read_pages(iocb, iter, pages, nr_pages);
> +     if (nr_pages == 0)
> +             goto was_truncated;
> +     if (nr_pages < 0)
> +             return nr_pages;
> +
> +     /*
> +      * i_size must be checked after we know the pages are Uptodate.
> +      *
> +      * Checking i_size after the check allows us to calculate the correct
> +      * value for "nr_pages", which means the zero-filled part of the page is
> +      * not copied back to userspace (unless another truncate extends the
> +      * file - this is desired though).
> +      */
> +     isize = i_size_read(mapping->host);
> +     if (unlikely(iocb->ki_pos >= isize))
> +             goto put_pages;
> +
> +     end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
> +
> +     while ((iocb->ki_pos >> PAGE_SHIFT) + nr_pages >
> +            (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
> +             put_page(pages[--nr_pages]);
> +
> +     /*
> +      * Once we start copying data, we don't want to be touching any
> +      * cachelines that might be contended:
> +      */
> +     writably_mapped = mapping_writably_mapped(mapping);
> +
> +     /*
> +      * When a sequential read accesses a page several times, only mark it as
> +      * accessed the first time.
> +      */
> +     if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
> +             mark_page_accessed(pages[0]);
> +     for (i = 1; i < nr_pages; i++)
> +             mark_page_accessed(pages[i]);
> +
> +     for (i = 0; i < nr_pages; i++) {
> +             unsigned offset = iocb->ki_pos & ~PAGE_MASK;
> +             unsigned bytes = min_t(loff_t, end_offset - iocb->ki_pos,
> +                                            PAGE_SIZE - offset);
> +             unsigned copied;
>  
> -no_cached_page:
>               /*
> -              * Ok, it wasn't cached, so we need to create a new
> -              * page..
> +              * If users can be writing to this page using arbitrary virtual
> +              * addresses, take care about potential aliasing before reading
> +              * the page on the kernel side.
>                */
> -             page = page_cache_alloc(mapping);
> -             if (!page) {
> -                     error = -ENOMEM;
> -                     goto out;
> -             }
> -             error = add_to_page_cache_lru(page, mapping, index,
> -                             mapping_gfp_constraint(mapping, GFP_KERNEL));
> -             if (error) {
> -                     put_page(page);
> -                     if (error == -EEXIST) {
> -                             error = 0;
> -                             goto find_page;
> -                     }
> -                     goto out;
> -             }
> -             goto readpage;
> +             if (writably_mapped)
> +                     flush_dcache_page(pages[i]);
> +
> +             copied = copy_page_to_iter(pages[i], offset, bytes, iter);
> +
> +             iocb->ki_pos += copied;
> +             ra->prev_pos = iocb->ki_pos;
> +
> +             if (copied < bytes)
> +                     return -EFAULT;
>       }
>  
> -would_block:
> -     error = -EAGAIN;
> -out:
> -     ra->prev_pos = prev_index;
> -     ra->prev_pos <<= PAGE_SHIFT;
> -     ra->prev_pos |= prev_offset;
> +put_pages:
> +     for (i = 0; i < nr_pages; i++)
> +             put_page(pages[i]);
> +     return 0;
> +}
> +
> +/**
> + * generic_file_buffered_read - generic file read routine
> + * @iocb:    the iocb to read
> + * @iter:    data destination
> + * @written: already copied
> + *
> + * This is a generic file read routine, and uses the
> + * mapping->a_ops->readpage() function for the actual low-level stuff.
> + *
> + * Return:
> + * * total number of bytes copied, including those the were already @written
> + * * negative error code if nothing was copied
> + */
> +ssize_t generic_file_buffered_read(struct kiocb *iocb,
> +             struct iov_iter *iter, ssize_t written)
> +{
> +     struct address_space *mapping = iocb->ki_filp->f_mapping;
> +     size_t orig_count = iov_iter_count(iter);
> +     struct inode *inode = mapping->host;
> +     struct page *page_array[8], **pages = NULL;
> +     unsigned max_pages = filemap_nr_pages(iocb, iter);
> +     int error;
>  
> -     *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
> -     file_accessed(filp);
> -     return written ? written : error;
> +     if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
> +             return 0;
> +     iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
> +
> +     if (max_pages > ARRAY_SIZE(page_array))
> +             pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
> +
> +     if (!pages) {
> +             pages = page_array;
> +             max_pages = ARRAY_SIZE(page_array);
> +     }
> +
> +     do {
> +             error = filemap_do_read(iocb, iter, pages, max_pages);
> +             if (error)
> +                     break;
> +     } while (iov_iter_count(iter) && iocb->ki_pos < i_size_read(inode));
> +
> +     file_accessed(iocb->ki_filp);
> +     written += orig_count - iov_iter_count(iter);
> +
> +     if (pages != page_array)
> +             kfree(pages);
> +
> +     if (unlikely(!written))
> +             return error;
> +     return written;
>  }
>  EXPORT_SYMBOL_GPL(generic_file_buffered_read);
>  

Reply via email to