afs_extract_data sets up a temporary iov_iter and passes it to AF_RXRPC
each time it is called to describe the remaining buffer to be filled.

Instead:

 (1) Put an iterator in the afs_call struct.

 (2) Set the iterator for each marshalling stage to load data into the
     appropriate places.  A number of convenience functions are provided to
     this end (eg. afs_extract_to_buf()).

     This iterator is then passed to afs_extract_data().

 (3) Use the new ITER_MAPPING iterator when reading data to load directly
     into the inode's pages without needing to create a list of them.  This
     comes with a page-done callback that can be used to unlock pages as
     they are filled.

 (4) Use the new ITER_DISCARD iterator to discard any excess data provided
     by FetchData.

This will allow O_DIRECT calls to be supported in future patches.

Signed-off-by: David Howells <dhowe...@redhat.com>
---

 fs/afs/cmservice.c         |   40 +++----
 fs/afs/dir.c               |  191 +++++++++++++++++++++++----------
 fs/afs/file.c              |  199 +++++++++++++++++++++++------------
 fs/afs/fsclient.c          |  252 ++++++++++++--------------------------------
 fs/afs/internal.h          |   52 ++++++++-
 fs/afs/rxrpc.c             |   41 ++-----
 fs/afs/vlclient.c          |  104 ++++++++----------
 fs/afs/write.c             |    8 +
 include/linux/fscache.h    |   31 +++++
 include/trace/events/afs.h |   22 ++--
 10 files changed, 499 insertions(+), 441 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 58f79301a716..4db62ae8dc1a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -176,13 +176,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the FID array and its count in two steps */
        case 1:
                _debug("extract FID count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -196,13 +196,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                                       GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
-               call->offset = 0;
+               afs_extract_to_buf(call, call->count * 3 * 4);
                call->unmarshall++;
 
        case 2:
                _debug("extract FID array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count * 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -222,13 +221,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                        cb->cb.type     = AFSCM_CB_UNTYPED;
                }
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the callback array and its count in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -237,13 +236,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                if (call->count2 != call->count && call->count2 != 0)
                        return afs_protocol_error(call, -EBADMSG,
                                                  afs_eproto_cb_count);
-               call->offset = 0;
+               afs_extract_to_buf(call, call->count2 * 3 * 4);
                call->unmarshall++;
 
        case 4:
                _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count2 * 3 * 4, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
@@ -256,7 +254,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                        cb->cb.type     = ntohl(*bp++);
                }
 
-               call->offset = 0;
                call->unmarshall++;
        case 5:
                break;
@@ -303,7 +300,8 @@ static int afs_deliver_cb_init_call_back_state(struct 
afs_call *call)
 
        rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
@@ -332,16 +330,15 @@ static int afs_deliver_cb_init_call_back_state3(struct 
afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -364,7 +361,6 @@ static int afs_deliver_cb_init_call_back_state3(struct 
afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -407,7 +403,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
@@ -455,16 +452,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call 
*call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -487,7 +483,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -572,7 +567,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct 
afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 855bf2b79fed..c36b54b7450b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -105,6 +105,40 @@ struct afs_lookup_cookie {
        struct afs_fid          fids[50];
 };
 
+/*
+ * Drop the refs that we're holding on the pages we were reading into.  We've
+ * got refs on the first nr_pages pages.
+ */
+static void afs_dir_read_cleanup(struct afs_read *req)
+{
+       struct radix_tree_iter iter;
+       struct address_space *mapping = req->iter.mapping;
+       struct page *page;
+       pgoff_t index = req->pos >> PAGE_SHIFT;
+       void __rcu **slot;
+
+       if (unlikely(!req->nr_pages))
+               return;
+
+       rcu_read_lock();
+       radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+
+               BUG_ON(radix_tree_exception(page));
+               BUG_ON(PageCompound(page));
+               BUG_ON(page->mapping != req->iter.mapping);
+
+               put_page(page);
+               req->nr_pages--;
+               if (req->nr_pages == 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+}
+
 /*
  * check that a directory page is valid
  */
@@ -130,7 +164,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, 
struct page *page,
        qty /= sizeof(union afs_xdr_dir_block);
 
        /* check them */
-       dbuf = kmap(page);
+       dbuf = kmap_atomic(page);
        for (tmp = 0; tmp < qty; tmp++) {
                if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
                        printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -148,7 +182,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, 
struct page *page,
                ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
        }
 
-       kunmap(page);
+       kunmap_atomic(dbuf);
 
 checked:
        afs_stat_v(dvnode, n_read_dir);
@@ -158,6 +192,45 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, 
struct page *page,
        return false;
 }
 
+/*
+ * Check all the pages in a directory.  All the pages are held pinned.
+ */
+static int afs_dir_check(struct afs_vnode *dvnode, unsigned int nr_pages,
+                        loff_t i_size)
+{
+       struct radix_tree_iter iter;
+       struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+       struct page *page;
+       void __rcu **slot;
+       int ret = 0;
+
+       if (unlikely(!nr_pages))
+               return 0;
+
+       rcu_read_lock();
+       radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, 0) {
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page)) {
+                       pr_warn("kAFS: Missing page in dircheck\n");
+                       ret = -EIO;
+                       break;
+               }
+               if (page->index >= nr_pages)
+                       break;
+
+               BUG_ON(radix_tree_exception(page));
+               BUG_ON(PageCompound(page));
+               BUG_ON(page->mapping != mapping);
+
+               ret = afs_dir_check_page(dvnode, page, i_size);
+               if (ret < 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+       return ret;
+}
+
 /*
  * open an AFS directory file
  */
@@ -184,56 +257,49 @@ static struct afs_read *afs_read_dir(struct afs_vnode 
*dvnode, struct key *key)
 {
        struct afs_read *req;
        loff_t i_size;
-       int nr_pages, nr_inline, i, n;
-       int ret = -ENOMEM;
+       int nr_pages, i, n;
+       int ret;
+
+       _enter("");
+
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+       if (!req)
+               return ERR_PTR(-ENOMEM);
+
+       refcount_set(&req->usage, 1);
+       req->cleanup = afs_dir_read_cleanup;
 
-retry:
+expand:
        i_size = i_size_read(&dvnode->vfs_inode);
+       ret = -EIO;
        if (i_size < 2048)
-               return ERR_PTR(-EIO);
+               goto error;
+       ret = -EFBIG;
        if (i_size > 2048 * 1024)
-               return ERR_PTR(-EFBIG);
-
-       _enter("%llu", i_size);
+               goto error;
 
-       /* Get a request record to hold the page list.  We want to hold it
-        * inline if we can, but we don't want to make an order 1 allocation.
-        */
        nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
-       nr_inline = nr_pages;
-       if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
-               nr_inline = 0;
 
-       req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
-                     GFP_KERNEL);
-       if (!req)
-               return ERR_PTR(-ENOMEM);
-
-       refcount_set(&req->usage, 1);
-       req->nr_pages = nr_pages;
        req->actual_len = i_size; /* May change */
        req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
        req->data_version = dvnode->status.data_version; /* May change */
-       if (nr_inline > 0) {
-               req->pages = req->array;
-       } else {
-               req->pages = kcalloc(nr_pages, sizeof(struct page *),
-                                    GFP_KERNEL);
-               if (!req->pages)
-                       goto error;
-       }
+       iov_iter_mapping(&req->iter, READ, dvnode->vfs_inode.i_mapping,
+                        0, i_size);
 
-       /* Get a list of all the pages that hold or will hold the directory
-        * content.  We need to fill in any gaps that we might find where the
-        * memory reclaimer has been at work.  If there are any gaps, we will
+       /* Fill in any gaps that we might find where the memory reclaimer has
+        * been at work and pin all the pages.  If there are any gaps, we will
         * need to reread the entire directory contents.
         */
-       i = 0;
-       do {
+       i = req->nr_pages;
+       while (i < nr_pages) {
+               struct page *pages[8], *page;
+
                n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
-                                         req->nr_pages - i,
-                                         req->pages + i);
-               _debug("find %u at %u/%u", n, i, req->nr_pages);
+                                         min_t(unsigned int, nr_pages - i,
+                                               ARRAY_SIZE(pages)),
+                                         pages);
+               _debug("find %u at %u/%u", n, i, nr_pages);
+
                if (n == 0) {
                        gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
 
@@ -241,23 +307,25 @@ static struct afs_read *afs_read_dir(struct afs_vnode 
*dvnode, struct key *key)
                                afs_stat_v(dvnode, n_inval);
 
                        ret = -ENOMEM;
-                       req->pages[i] = __page_cache_alloc(gfp);
-                       if (!req->pages[i])
+                       page = __page_cache_alloc(gfp);
+                       if (!page)
                                goto error;
-                       ret = add_to_page_cache_lru(req->pages[i],
+                       ret = add_to_page_cache_lru(page,
                                                    dvnode->vfs_inode.i_mapping,
                                                    i, gfp);
                        if (ret < 0)
                                goto error;
 
-                       set_page_private(req->pages[i], 1);
-                       SetPagePrivate(req->pages[i]);
-                       unlock_page(req->pages[i]);
+                       set_page_private(page, 1);
+                       SetPagePrivate(page);
+                       unlock_page(page);
+                       req->nr_pages++;
                        i++;
                } else {
+                       req->nr_pages += n;
                        i += n;
                }
-       } while (i < req->nr_pages);
+       }
 
        /* If we're going to reload, we need to lock all the pages to prevent
         * races.
@@ -280,15 +348,18 @@ static struct afs_read *afs_read_dir(struct afs_vnode 
*dvnode, struct key *key)
 
                task_io_account_read(PAGE_SIZE * req->nr_pages);
 
-               if (req->len < req->file_size)
-                       goto content_has_grown;
+               if (req->len < req->file_size) {
+                       /* The content has grown, so we need to expand the
+                        * buffer.
+                        */
+                       up_write(&dvnode->validate_lock);
+                       goto expand;
+               }
 
                /* Validate the data we just read. */
-               ret = -EIO;
-               for (i = 0; i < req->nr_pages; i++)
-                       if (!afs_dir_check_page(dvnode, req->pages[i],
-                                               req->actual_len))
-                               goto error_unlock;
+               ret = afs_dir_check(dvnode, req->nr_pages, req->actual_len);
+               if (ret < 0)
+                       goto error_unlock;
 
                // TODO: Trim excess pages
 
@@ -305,11 +376,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode 
*dvnode, struct key *key)
        afs_put_read(req);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
-
-content_has_grown:
-       up_write(&dvnode->validate_lock);
-       afs_put_read(req);
-       goto retry;
 }
 
 /*
@@ -415,6 +481,7 @@ static int afs_dir_iterate(struct inode *dir, struct 
dir_context *ctx,
        struct afs_read *req;
        struct page *page;
        unsigned blkoff, limit;
+       void __rcu **slot;
        int ret;
 
        _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -438,9 +505,15 @@ static int afs_dir_iterate(struct inode *dir, struct 
dir_context *ctx,
                blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
 
                /* Fetch the appropriate page from the directory and re-add it
-                * to the LRU.
+                * to the LRU.  We have all the pages pinned with an extra ref.
                 */
-               page = req->pages[blkoff / PAGE_SIZE];
+               rcu_read_lock();
+               page = NULL;
+               slot = 
radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
+                                             blkoff / PAGE_SIZE);
+               if (slot)
+                       page = radix_tree_deref_slot(slot);
+               rcu_read_unlock();
                if (!page) {
                        ret = -EIO;
                        break;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7d4f26198573..e887a9b24f4f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -148,7 +148,7 @@ int afs_open(struct inode *inode, struct file *file)
 
        if (file->f_flags & O_TRUNC)
                set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
-       
+
        file->private_data = af;
        _leave(" = 0");
        return 0;
@@ -185,24 +185,79 @@ int afs_release(struct inode *inode, struct file *file)
        return 0;
 }
 
+/*
+ * Make pages available as they're filled.  This function may not sleep.
+ */
+static void afs_readpages_page_done(const struct iov_iter *iter,
+                                   const struct bio_vec *bv)
+{
+       struct page *page = bv->bv_page;
+       struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+       struct afs_read *req = container_of(iter, struct afs_read, iter);
+
+       SetPageUptodate(page);
+
+       if (0 && afs_vnode_cache(vnode))
+               SetPageFsCache(page);
+       unlock_page(page);
+       put_page(page);
+       req->done_pages++;
+}
+
+/*
+ * Unlock the pages we were reading into.  We've got locks and refs on the
+ * first nr_pages pages.
+ */
+static void afs_file_read_cleanup(struct afs_read *req)
+{
+       struct radix_tree_iter iter;
+       struct address_space *mapping = req->iter.mapping;
+       struct page *page;
+       pgoff_t index = req->pos >> PAGE_SHIFT;
+       void **slot;
+
+       _enter("%lu,%u,%u,%zu",
+              index, req->done_pages, req->nr_pages, 
iov_iter_count(&req->iter));
+
+       if (likely(req->done_pages >= req->nr_pages))
+               return;
+
+       rcu_read_lock();
+       radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
+               page = radix_tree_deref_slot(slot);
+               if (unlikely(!page))
+                       continue;
+
+               BUG_ON(radix_tree_exception(page));
+               BUG_ON(PageCompound(page));
+               BUG_ON(page != *slot);
+               BUG_ON(page->mapping != req->iter.mapping);
+
+               if (req->error)
+                       SetPageError(page);
+               unlock_page(page);
+               put_page(page);
+               req->done_pages++;
+               if (req->done_pages >= req->nr_pages)
+                       break;
+       }
+
+       rcu_read_unlock();
+}
+
 /*
  * Dispose of a ref to a read record.
  */
 void afs_put_read(struct afs_read *req)
 {
-       int i;
-
        if (refcount_dec_and_test(&req->usage)) {
-               for (i = 0; i < req->nr_pages; i++)
-                       if (req->pages[i])
-                               put_page(req->pages[i]);
-               if (req->pages != req->array)
-                       kfree(req->pages);
+               if (req->cleanup)
+                       req->cleanup(req);
                kfree(req);
        }
 }
 
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 /*
  * deal with notification that a page was read from the cache
  */
@@ -257,6 +312,22 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key 
*key, struct afs_read *de
        return ret;
 }
 
+/*
+ * Clear the trailer after a short read.
+ */
+static void afs_clear_after_read(struct afs_vnode *vnode, struct afs_read *req,
+                                bool catch_page_done)
+{
+       if (req->actual_len >= req->len)
+               return;
+       iov_iter_mapping(&req->iter, READ, vnode->vfs_inode.i_mapping,
+                        req->pos + req->actual_len,
+                        req->len - req->actual_len);
+       if (catch_page_done)
+               req->iter.page_done = afs_readpages_page_done;
+       iov_iter_zero(req->len - req->actual_len, &req->iter);
+}
+
 /*
  * read page from file, directory or symlink, given a key to use
  */
@@ -277,7 +348,7 @@ int afs_page_filler(void *data, struct page *page)
                goto error;
 
        /* is it cached? */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
        ret = fscache_read_or_alloc_page(vnode->cache,
                                         page,
                                         afs_file_readpage_read_complete,
@@ -301,8 +372,7 @@ int afs_page_filler(void *data, struct page *page)
                _debug("cache said ENOBUFS");
        default:
        go_on:
-               req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
-                             GFP_KERNEL);
+               req = kzalloc(sizeof(struct afs_read), GFP_KERNEL);
                if (!req)
                        goto enomem;
 
@@ -314,10 +384,11 @@ int afs_page_filler(void *data, struct page *page)
                req->pos = (loff_t)page->index << PAGE_SHIFT;
                req->len = PAGE_SIZE;
                req->nr_pages = 1;
-               req->pages = req->array;
-               req->pages[0] = page;
                get_page(page);
 
+               iov_iter_mapping(&req->iter, READ, page->mapping,
+                                (loff_t)page->index << PAGE_SHIFT, PAGE_SIZE);
+
                /* read the contents of the file from the server into the
                 * page */
                ret = afs_fetch_data(vnode, key, req);
@@ -331,11 +402,6 @@ int afs_page_filler(void *data, struct page *page)
                                ret = -ESTALE;
                        }
 
-#ifdef CONFIG_AFS_FSCACHE
-                       fscache_uncache_page(vnode->cache, page);
-#endif
-                       BUG_ON(PageFsCache(page));
-
                        if (ret == -EINTR ||
                            ret == -ENOMEM ||
                            ret == -ERESTARTSYS ||
@@ -344,10 +410,11 @@ int afs_page_filler(void *data, struct page *page)
                        goto io_error;
                }
 
+               afs_clear_after_read(vnode, req, false);
                SetPageUptodate(page);
 
                /* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
                if (PageFsCache(page) &&
                    fscache_write_page(vnode->cache, page, vnode->status.size,
                                       GFP_KERNEL) != 0) {
@@ -398,31 +465,39 @@ static int afs_readpage(struct file *file, struct page 
*page)
        return ret;
 }
 
+#if 0
 /*
- * Make pages available as they're filled.
+ * Allow writing to a page to take place.  This function may not sleep.
  */
-static void afs_readpages_page_done(struct afs_call *call, struct afs_read 
*req)
+static void afs_clear_page_fscache_mark(const struct iov_iter *iter,
+                                       struct page *page)
 {
-#ifdef CONFIG_AFS_FSCACHE
-       struct afs_vnode *vnode = call->reply[0];
-#endif
-       struct page *page = req->pages[req->index];
+       ClearPageFsCache(page);
+}
 
-       req->pages[req->index] = NULL;
-       SetPageUptodate(page);
+static void afs_fscache_write_done(struct fscache_cookie *cookie,
+                                  struct iov_iter *iter)
+{
+       struct afs_read *req = container_of(iter, struct afs_read, iter);
+
+       afs_put_read(req);
+}
+
+/*
+ * Write the read data to the cache.
+ */
+static void afs_readpages_write_to_cache(struct afs_read *req)
+{
+       struct afs_vnode *vnode = AFS_FS_I(req->iter.mapping->host);
 
-       /* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
-       if (PageFsCache(page) &&
-           fscache_write_page(vnode->cache, page, vnode->status.size,
-                              GFP_KERNEL) != 0) {
-               fscache_uncache_page(vnode->cache, page);
-               BUG_ON(PageFsCache(page));
+       if (afs_vnode_cache(vnode)) {
+               req->iter.page_done = afs_clear_page_fscache_mark;
+               fscache_write(vnode->cache, &req->iter, req->pos,
+                             req->file_size, GFP_KERNEL,
+                             afs_fscache_write_done);
        }
-#endif
-       unlock_page(page);
-       put_page(page);
 }
+#endif
 
 /*
  * Read a contiguous set of pages.
@@ -436,7 +511,7 @@ static int afs_readpages_one(struct file *file, struct 
address_space *mapping,
        struct page *first, *page;
        struct key *key = afs_file_key(file);
        pgoff_t index;
-       int ret, n, i;
+       int ret, n;
 
        /* Count the number of contiguous pages at the front of the list.  Note
         * that the list goes prev-wards rather than next-wards.
@@ -452,20 +527,17 @@ static int afs_readpages_one(struct file *file, struct 
address_space *mapping,
                n++;
        }
 
-       req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n,
-                     GFP_NOFS);
+       req = kzalloc(sizeof(struct afs_read), GFP_NOFS);
        if (!req)
                return -ENOMEM;
 
        refcount_set(&req->usage, 1);
-       req->page_done = afs_readpages_page_done;
+       req->cleanup = afs_file_read_cleanup;
        req->pos = first->index;
        req->pos <<= PAGE_SHIFT;
-       req->pages = req->array;
 
-       /* Transfer the pages to the request.  We add them in until one fails
-        * to add to the LRU and then we stop (as that'll make a hole in the
-        * contiguous run.
+       /* Add pages to the LRU until it fails.  We keep the pages ref'd and
+        * locked until the read is complete.
         *
         * Note that it's possible for the file size to change whilst we're
         * doing this, but we rely on the server returning less than we asked
@@ -478,15 +550,11 @@ static int afs_readpages_one(struct file *file, struct 
address_space *mapping,
                index = page->index;
                if (add_to_page_cache_lru(page, mapping, index,
                                          readahead_gfp_mask(mapping))) {
-#ifdef CONFIG_AFS_FSCACHE
-                       fscache_uncache_page(vnode->cache, page);
-#endif
                        put_page(page);
                        break;
                }
 
-               req->pages[req->nr_pages++] = page;
-               req->len += PAGE_SIZE;
+               req->nr_pages++;
        } while (req->nr_pages < n);
 
        if (req->nr_pages == 0) {
@@ -494,33 +562,26 @@ static int afs_readpages_one(struct file *file, struct 
address_space *mapping,
                return 0;
        }
 
+       req->len = req->nr_pages * PAGE_SIZE;
+       iov_iter_mapping(&req->iter, READ, file->f_mapping, req->pos, req->len);
+       req->iter.page_done = afs_readpages_page_done;
+
        ret = afs_fetch_data(vnode, key, req);
        if (ret < 0)
                goto error;
 
-       task_io_account_read(PAGE_SIZE * req->nr_pages);
-       afs_put_read(req);
+       afs_clear_after_read(vnode, req, true);
+       task_io_account_read(req->len);
        return 0;
 
 error:
        if (ret == -ENOENT) {
-               _debug("got NOENT from server"
-                      " - marking file deleted and stale");
+               _debug("got NOENT from server - marking file deleted and 
stale");
                set_bit(AFS_VNODE_DELETED, &vnode->flags);
                ret = -ESTALE;
        }
 
-       for (i = 0; i < req->nr_pages; i++) {
-               page = req->pages[i];
-               if (page) {
-#ifdef CONFIG_AFS_FSCACHE
-                       fscache_uncache_page(vnode->cache, page);
-#endif
-                       SetPageError(page);
-                       unlock_page(page);
-               }
-       }
-
+       req->error = true;
        afs_put_read(req);
        return ret;
 }
@@ -547,7 +608,7 @@ static int afs_readpages(struct file *file, struct 
address_space *mapping,
        }
 
        /* attempt to read as many of the pages as possible */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
        ret = fscache_read_or_alloc_pages(vnode->cache,
                                          mapping,
                                          pages,
@@ -605,7 +666,7 @@ static void afs_invalidatepage(struct page *page, unsigned 
int offset,
 
        /* we clean up only if the entire page is being invalidated */
        if (offset == 0 && length == PAGE_SIZE) {
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
                if (PageFsCache(page)) {
                        struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
                        fscache_wait_on_page_write(vnode->cache, page);
@@ -640,7 +701,7 @@ static int afs_releasepage(struct page *page, gfp_t 
gfp_flags)
 
        /* deny if page is being written to the cache and the caller hasn't
         * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
        if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
                _leave(" = F [cache busy]");
                return 0;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d9a5815945dc..f0cef8e7b1af 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -20,12 +20,6 @@
 
 static const struct afs_fid afs_zero_fid;
 
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
 static inline void afs_use_fs_server(struct afs_call *call, struct 
afs_cb_interest *cbi)
 {
        call->cbi = afs_get_cb_interest(cbi);
@@ -468,115 +462,82 @@ static int afs_deliver_fs_fetch_data(struct afs_call 
*call)
        struct afs_vnode *vnode = call->reply[0];
        struct afs_read *req = call->reply[2];
        const __be32 *bp;
-       unsigned int size;
-       void *buffer;
        int ret;
 
-       _enter("{%u,%zu/%u;%llu/%llu}",
-              call->unmarshall, call->offset, call->count,
-              req->remain, req->actual_len);
+       _enter("{%u,%zu/%llu}",
+              call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
 
        switch (call->unmarshall) {
        case 0:
                req->actual_len = 0;
-               call->offset = 0;
                call->unmarshall++;
                if (call->operation_ID != FSFETCHDATA64) {
                        call->unmarshall++;
                        goto no_msw;
                }
+               afs_extract_to_tmp(call);
 
                /* extract the upper part of the returned data length of an
-                * FSFETCHDATA64 op (which should always be 0 using this
-                * client) */
+                * FSFETCHDATA64 op.
+                */
        case 1:
                _debug("extract data length (MSW)");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                req->actual_len = ntohl(call->tmp);
                req->actual_len <<= 32;
-               call->offset = 0;
                call->unmarshall++;
-
        no_msw:
+               afs_extract_to_tmp(call);
+
                /* extract the returned data length */
        case 2:
                _debug("extract data length");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                req->actual_len |= ntohl(call->tmp);
                _debug("DATA length: %llu", req->actual_len);
 
-               req->remain = req->actual_len;
-               call->offset = req->pos & (PAGE_SIZE - 1);
-               req->index = 0;
                if (req->actual_len == 0)
                        goto no_more_data;
                call->unmarshall++;
-
-       begin_page:
-               ASSERTCMP(req->index, <, req->nr_pages);
-               if (req->remain > PAGE_SIZE - call->offset)
-                       size = PAGE_SIZE - call->offset;
-               else
-                       size = req->remain;
-               call->count = call->offset + size;
-               ASSERTCMP(call->count, <=, PAGE_SIZE);
-               req->remain -= size;
+               call->_iter = &req->iter;
+               iov_iter_truncate(&req->iter, req->actual_len);
 
                /* extract the returned data */
        case 3:
-               _debug("extract data %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
+               _debug("extract data %zu/%llu",
+                      iov_iter_count(&call->iter), req->actual_len);
 
-               buffer = kmap(req->pages[req->index]);
-               ret = afs_extract_data(call, buffer, call->count, true);
-               kunmap(req->pages[req->index]);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (call->offset == PAGE_SIZE) {
-                       if (req->page_done)
-                               req->page_done(call, req);
-                       req->index++;
-                       if (req->remain > 0) {
-                               call->offset = 0;
-                               if (req->index >= req->nr_pages) {
-                                       call->unmarshall = 4;
-                                       goto begin_discard;
-                               }
-                               goto begin_page;
-                       }
-               }
-               goto no_more_data;
+
+               call->_iter = &call->iter;
+               if (req->actual_len <= req->len)
+                       goto no_more_data;
 
                /* Discard any excess data the server gave us */
-       begin_discard:
+               iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
        case 4:
-               size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
-               call->count = size;
-               _debug("extract discard %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
-
-               call->offset = 0;
-               ret = afs_extract_data(call, afs_discard_buffer, call->count, 
true);
-               req->remain -= call->offset;
+               _debug("extract discard %zu/%llu",
+                      iov_iter_count(&call->iter), req->actual_len - req->len);
+
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (req->remain > 0)
-                       goto begin_discard;
 
        no_more_data:
-               call->offset = 0;
                call->unmarshall = 5;
+               afs_extract_to_buf(call, (21 + 3 + 6) * 4);
 
                /* extract the metadata */
        case 5:
-               ret = afs_extract_data(call, call->buffer,
-                                      (21 + 3 + 6) * 4, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
@@ -589,22 +550,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call 
*call)
                if (call->reply[1])
                        xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 6:
                break;
        }
 
-       for (; req->index < req->nr_pages; req->index++) {
-               if (call->count < PAGE_SIZE)
-                       zero_user_segment(req->pages[req->index],
-                                         call->count, PAGE_SIZE);
-               if (req->page_done)
-                       req->page_done(call, req);
-               call->count = 0;
-       }
-
        _leave(" = 0 [done]");
        return 0;
 }
@@ -700,6 +651,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct 
afs_read *req)
        call->reply[1] = NULL; /* volsync */
        call->reply[2] = req;
        call->expected_version = vnode->status.data_version;
+       req->call_debug_id = call->debug_id;
 
        /* marshall the parameters */
        bp = call->request;
@@ -1598,31 +1550,31 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
 {
        const __be32 *bp;
        char *p;
+       u32 size;
        int ret;
 
        _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_buf(call, 12 * 4);
 
                /* extract the returned status record */
        case 1:
                _debug("extract status");
-               ret = afs_extract_data(call, call->buffer,
-                                      12 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
                xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_tmp(call);
 
                /* extract the volume name length */
        case 2:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -1631,46 +1583,26 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
                if (call->count >= AFSNAMEMAX)
                        return afs_protocol_error(call, -EBADMSG,
                                                  afs_eproto_volname_len);
-               call->offset = 0;
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the volume name */
        case 3:
                _debug("extract volname");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("volname '%s'", p);
-
-               call->offset = 0;
-               call->unmarshall++;
-
-               /* extract the volume name padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_volname_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 4:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
-       no_volname_padding:
 
                /* extract the offline message length */
-       case 5:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 4:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -1679,46 +1611,27 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
                if (call->count >= AFSNAMEMAX)
                        return afs_protocol_error(call, -EBADMSG,
                                                  afs_eproto_offline_msg_len);
-               call->offset = 0;
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the offline message */
-       case 6:
+       case 5:
                _debug("extract offline");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("offline '%s'", p);
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
-               /* extract the offline message padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_offline_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 7:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       no_offline_padding:
-
                /* extract the message of the day length */
-       case 8:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 6:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -1727,38 +1640,24 @@ static int afs_deliver_fs_get_volume_status(struct 
afs_call *call)
                if (call->count >= AFSNAMEMAX)
                        return afs_protocol_error(call, -EBADMSG,
                                                  afs_eproto_motd_len);
-               call->offset = 0;
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the message of the day */
-       case 9:
+       case 7:
                _debug("extract motd");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("motd '%s'", p);
 
-               call->offset = 0;
                call->unmarshall++;
 
-               /* extract the message of the day padding */
-               call->count = (4 - (call->count & 3)) & 3;
-
-       case 10:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, false);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       case 11:
+       case 8:
                break;
        }
 
@@ -2024,19 +1923,16 @@ static int afs_deliver_fs_get_capabilities(struct 
afs_call *call)
        u32 count;
        int ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the capabilities word count */
        case 1:
-               ret = afs_extract_data(call, &call->tmp,
-                                      1 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2044,24 +1940,17 @@ static int afs_deliver_fs_get_capabilities(struct 
afs_call *call)
 
                call->count = count;
                call->count2 = count;
-               call->offset = 0;
+               iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
                call->unmarshall++;
 
                /* Extract capabilities words */
        case 2:
-               count = min(call->count, 16U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 16);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                /* TODO: Examine capabilities */
 
-               call->count -= count;
-               if (call->count > 0)
-                       goto again;
-               call->offset = 0;
                call->unmarshall++;
                break;
        }
@@ -2215,13 +2104,13 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the file status count and array in two steps */
        case 1:
                _debug("extract status count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2234,11 +2123,11 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
                call->count = 0;
                call->unmarshall++;
        more_counts:
-               call->offset = 0;
+               afs_extract_to_buf(call, 21 * sizeof(__be32));
 
        case 2:
                _debug("extract status array %u", call->count);
-               ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2256,12 +2145,12 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
 
                call->count = 0;
                call->unmarshall++;
-               call->offset = 0;
+               afs_extract_to_tmp(call);
 
                /* Extract the callback count and array in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2273,11 +2162,11 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
                call->count = 0;
                call->unmarshall++;
        more_cbs:
-               call->offset = 0;
+               afs_extract_to_buf(call, 3 * sizeof(__be32));
 
        case 4:
                _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2294,11 +2183,11 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
                if (call->count < call->count2)
                        goto more_cbs;
 
-               call->offset = 0;
+               afs_extract_to_buf(call, 6 * sizeof(__be32));
                call->unmarshall++;
 
        case 5:
-               ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
@@ -2306,7 +2195,6 @@ static int afs_deliver_fs_inline_bulk_status(struct 
afs_call *call)
                if (call->reply[3])
                        xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 6:
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 457a8f76b6a2..997ab8350dfe 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -96,11 +96,16 @@ struct afs_call {
        struct afs_cb_interest  *cbi;           /* Callback interest for server 
used */
        void                    *request;       /* request data (first part) */
        struct address_space    *mapping;       /* Pages being written from */
+       struct iov_iter         iter;           /* Buffer iterator */
+       struct iov_iter         *_iter;         /* Iterator currently in use */
+       union { /* Convenience for ->iter */
+               struct kvec     kvec[1];
+               struct bio_vec  bvec[1];
+       };
        void                    *buffer;        /* reply receive buffer */
        void                    *reply[4];      /* Where to put the reply */
        pgoff_t                 first;          /* first page in mapping to 
deal with */
        pgoff_t                 last;           /* last page in mapping to deal 
with */
-       size_t                  offset;         /* offset into received data 
store */
        atomic_t                usage;
        enum afs_call_state     state;
        spinlock_t              state_lock;
@@ -177,15 +182,15 @@ struct afs_read {
        loff_t                  pos;            /* Where to start reading */
        loff_t                  len;            /* How much we're asking for */
        loff_t                  actual_len;     /* How much we're actually 
getting */
-       loff_t                  remain;         /* Amount remaining */
        loff_t                  file_size;      /* File size returned by server 
*/
        afs_dataversion_t       data_version;   /* Version number returned by 
server */
        refcount_t              usage;
-       unsigned int            index;          /* Which page we're reading 
into */
        unsigned int            nr_pages;
-       void (*page_done)(struct afs_call *, struct afs_read *);
-       struct page             **pages;
-       struct page             *array[];
+       unsigned int            done_pages;
+       bool                    error;
+       unsigned int            call_debug_id;
+       void (*cleanup)(struct afs_read *);
+       struct iov_iter         iter;           /* Buffer */
 };
 
 /*
@@ -548,6 +553,15 @@ struct afs_vnode {
        afs_callback_type_t     cb_type;        /* type of callback */
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+       return vnode->cache;
+#else
+       return NULL;
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
@@ -928,12 +942,34 @@ extern struct afs_call *afs_alloc_flat_call(struct 
afs_net *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
+extern int afs_extract_data(struct afs_call *, bool);
 extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
 
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t 
size)
+{
+       call->kvec[0].iov_base = buf;
+       call->kvec[0].iov_len = size;
+       iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+       afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+       iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+       afs_extract_begin(call, call->buffer, size);
+}
+
 static inline int afs_transfer_reply(struct afs_call *call)
 {
-       return afs_extract_data(call, call->buffer, call->reply_max, false);
+       return afs_extract_data(call, false);
 }
 
 static inline bool afs_check_call_state(struct afs_call *call,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 20199f2b2c31..966e30f30cbb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -143,6 +143,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
        INIT_WORK(&call->async_work, afs_process_async_call);
        init_waitqueue_head(&call->waitq);
        spin_lock_init(&call->state_lock);
+       call->_iter = &call->iter;
 
        o = atomic_inc_return(&net->nr_outstanding_calls);
        trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -233,6 +234,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
                        goto nomem_free;
        }
 
+       afs_extract_to_buf(call, call->reply_max);
        call->operation_ID = type->op;
        init_waitqueue_head(&call->waitq);
        return call;
@@ -465,14 +467,12 @@ static void afs_deliver_to_call(struct afs_call *call)
               state == AFS_CALL_SV_AWAIT_ACK
               ) {
                if (state == AFS_CALL_SV_AWAIT_ACK) {
-                       struct iov_iter iter;
-
-                       iov_iter_kvec(&iter, READ, NULL, 0, 0);
+                       iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
                        ret = rxrpc_kernel_recv_data(call->net->socket,
-                                                    call->rxcall, &iter, false,
-                                                    &remote_abort,
+                                                    call->rxcall, &call->iter,
+                                                    false, &remote_abort,
                                                     &call->service_id);
-                       trace_afs_recv_data(call, 0, 0, false, ret);
+                       trace_afs_receive_data(call, &call->iter, false, ret);
 
                        if (ret == -EINPROGRESS || ret == -EAGAIN)
                                return;
@@ -516,7 +516,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                        if (state != AFS_CALL_CL_AWAIT_REPLY)
                                abort_code = RXGEN_SS_UNMARSHAL;
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-                                               abort_code, -EBADMSG, "KUM");
+                                               abort_code, ret, "KUM");
                        goto local_abort;
                }
        }
@@ -729,6 +729,7 @@ void afs_charge_preallocation(struct work_struct *work)
                        call->async = true;
                        call->state = AFS_CALL_SV_AWAIT_OP_ID;
                        init_waitqueue_head(&call->waitq);
+                       afs_extract_to_tmp(call);
                }
 
                if (rxrpc_kernel_charge_accept(net->socket,
@@ -774,18 +775,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 {
        int ret;
 
-       _enter("{%zu}", call->offset);
-
-       ASSERTCMP(call->offset, <, 4);
+       _enter("{%zu}", iov_iter_count(call->_iter));
 
        /* the operation ID forms the first four bytes of the request data */
-       ret = afs_extract_data(call, &call->tmp, 4, true);
+       ret = afs_extract_data(call, true);
        if (ret < 0)
                return ret;
 
        call->operation_ID = ntohl(call->tmp);
        afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, 
AFS_CALL_SV_AWAIT_REQUEST);
-       call->offset = 0;
 
        /* ask the cache manager to route the call (it'll change the call type
         * if successful) */
@@ -889,30 +887,19 @@ void afs_send_simple_reply(struct afs_call *call, const 
void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
-                    bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
        struct afs_net *net = call->net;
-       struct iov_iter iter;
-       struct kvec iov;
+       struct iov_iter *iter = call->_iter;
        enum afs_call_state state;
        u32 remote_abort = 0;
        int ret;
 
-       _enter("{%s,%zu},,%zu,%d",
-              call->type->name, call->offset, count, want_more);
-
-       ASSERTCMP(call->offset, <=, count);
-
-       iov.iov_base = buf + call->offset;
-       iov.iov_len = count - call->offset;
-       iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset);
+       _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), 
want_more);
 
-       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
                                     want_more, &remote_abort,
                                     &call->service_id);
-       call->offset += (count - call->offset) - iov_iter_count(&iter);
-       trace_afs_recv_data(call, count, call->offset, want_more, ret);
        if (ret == 0 || ret == -EAGAIN)
                return ret;
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d0f95c4ab05e..e18c51742daa 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -187,19 +187,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call 
*call)
        u32 uniquifier, nentries, count;
        int i, ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu/%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_buf(call,
+                                  sizeof(struct afs_uuid__xdr) + 3 * 
sizeof(__be32));
                call->unmarshall++;
 
                /* Extract the returned uuid, uniquifier, nentries and blkaddrs 
size */
        case 1:
-               ret = afs_extract_data(call, call->buffer,
-                                      sizeof(struct afs_uuid__xdr) + 3 * 
sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -216,28 +215,28 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call 
*call)
                call->reply[0] = alist;
                call->count = count;
                call->count2 = nentries;
-               call->offset = 0;
                call->unmarshall++;
 
+       more_entries:
+               count = min(call->count, 4U);
+               afs_extract_to_buf(call, count * sizeof(__be32));
+
                /* Extract entries */
        case 2:
-               count = min(call->count, 4U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 4);
+               ret = afs_extract_data(call, call->count > 4);
                if (ret < 0)
                        return ret;
 
                alist = call->reply[0];
                bp = call->buffer;
+               count = min(call->count, 4U);
                for (i = 0; i < count; i++)
                        if (alist->nr_addrs < call->count2)
                                afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
 
                call->count -= count;
                if (call->count > 0)
-                       goto again;
-               call->offset = 0;
+                       goto more_entries;
                call->unmarshall++;
                break;
        }
@@ -318,44 +317,35 @@ static int afs_deliver_vl_get_capabilities(struct 
afs_call *call)
        u32 count;
        int ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu/%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the capabilities word count */
        case 1:
-               ret = afs_extract_data(call, &call->tmp,
-                                      1 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                count = ntohl(call->tmp);
-
                call->count = count;
                call->count2 = count;
-               call->offset = 0;
+
                call->unmarshall++;
+               afs_extract_discard(call, count * sizeof(__be32));
 
                /* Extract capabilities words */
        case 2:
-               count = min(call->count, 16U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 16);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                /* TODO: Examine capabilities */
 
-               call->count -= count;
-               if (call->count > 0)
-                       goto again;
-               call->offset = 0;
                call->unmarshall++;
                break;
        }
@@ -426,22 +416,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct 
afs_call *call)
        u32 uniquifier, size;
        int ret;
 
-       _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, 
call->count2);
+       _enter("{%u,%zu,%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count2);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
                call->unmarshall = 1;
 
                /* Extract the returned uuid, uniquifier, fsEndpoints count and
                 * either the first fsEndpoint type or the volEndpoints
                 * count if there are no fsEndpoints. */
        case 1:
-               ret = afs_extract_data(call, call->buffer,
-                                      sizeof(uuid_t) +
-                                      3 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -459,15 +446,11 @@ static int afs_deliver_yfsvl_get_endpoints(struct 
afs_call *call)
                        return -ENOMEM;
                alist->version = uniquifier;
                call->reply[0] = alist;
-               call->offset = 0;
 
                if (call->count == 0)
                        goto extract_volendpoints;
 
-               call->unmarshall = 2;
-
-               /* Extract fsEndpoints[] entries */
-       case 2:
+       next_fsendpoint:
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        size = sizeof(__be32) * (1 + 1 + 1);
@@ -481,7 +464,12 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call 
*call)
                }
 
                size += sizeof(__be32);
-               ret = afs_extract_data(call, call->buffer, size, true);
+               afs_extract_to_buf(call, size);
+               call->unmarshall = 2;
+
+               /* Extract fsEndpoints[] entries */
+       case 2:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -512,10 +500,9 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call 
*call)
                 */
                call->count2 = ntohl(*bp++);
 
-               call->offset = 0;
                call->count--;
                if (call->count > 0)
-                       goto again;
+                       goto next_fsendpoint;
 
        extract_volendpoints:
                /* Extract the list of volEndpoints. */
@@ -526,6 +513,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call 
*call)
                        return afs_protocol_error(call, -EBADMSG,
                                                  afs_eproto_yvl_vlendpt_type);
 
+               afs_extract_to_buf(call, 1 * sizeof(__be32));
                call->unmarshall = 3;
 
                /* Extract the type of volEndpoints[0].  Normally we would
@@ -533,17 +521,14 @@ static int afs_deliver_yfsvl_get_endpoints(struct 
afs_call *call)
                 * data of the current one, but this is the first...
                 */
        case 3:
-               ret = afs_extract_data(call, call->buffer, sizeof(__be32), 
true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               call->count2 = ntohl(*bp++);
-               call->offset = 0;
-               call->unmarshall = 4;
 
-               /* Extract volEndpoints[] entries */
-       case 4:
+       next_volendpoint:
+               call->count2 = ntohl(*bp++);
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        size = sizeof(__be32) * (1 + 1 + 1);
@@ -557,8 +542,13 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call 
*call)
                }
 
                if (call->count > 1)
-                       size += sizeof(__be32);
-               ret = afs_extract_data(call, call->buffer, size, true);
+                       size += sizeof(__be32); /* Get next type too */
+               afs_extract_to_buf(call, size);
+               call->unmarshall = 4;
+
+               /* Extract volEndpoints[] entries */
+       case 4:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -584,19 +574,17 @@ static int afs_deliver_yfsvl_get_endpoints(struct 
afs_call *call)
                /* Got either the type of the next entry or the count of
                 * volEndpoints if no more fsEndpoints.
                 */
-               call->offset = 0;
                call->count--;
-               if (call->count > 0) {
-                       call->count2 = ntohl(*bp++);
-                       goto again;
-               }
+               if (call->count > 0)
+                       goto next_volendpoint;
 
        end:
+               afs_extract_discard(call, 0);
                call->unmarshall = 5;
 
                /* Done */
        case 5:
-               ret = afs_extract_data(call, call->buffer, 0, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
                call->unmarshall = 6;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 19c04caf3c01..d07e7f29f50a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -37,8 +37,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key 
*key,
 
        _enter(",,%llu", (unsigned long long)pos);
 
-       req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
-                     GFP_KERNEL);
+       req = kzalloc(sizeof(struct afs_read), GFP_KERNEL);
        if (!req)
                return -ENOMEM;
 
@@ -46,9 +45,8 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key 
*key,
        req->pos = pos;
        req->len = len;
        req->nr_pages = 1;
-       req->pages = req->array;
-       req->pages[0] = page;
-       get_page(page);
+       iov_iter_mapping(&req->iter, READ, vnode->vfs_inode.i_mapping,
+                        pos, len);
 
        ret = afs_fetch_data(vnode, key, req);
        afs_put_read(req);
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 84b90a79d75a..d99294c75e9a 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -218,6 +218,9 @@ extern int __fscache_read_or_alloc_pages(struct 
fscache_cookie *,
                                         gfp_t);
 extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t);
 extern int __fscache_write_page(struct fscache_cookie *, struct page *, 
loff_t, gfp_t);
+extern int __fscache_write(struct fscache_cookie *, struct iov_iter *,
+                          loff_t, loff_t, gfp_t,
+                          void (*)(struct fscache_cookie *, struct iov_iter 
*));
 extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
 extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
 extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page 
*);
@@ -655,6 +658,34 @@ void fscache_readpages_cancel(struct fscache_cookie 
*cookie,
                __fscache_readpages_cancel(cookie, pages);
 }
 
+/**
+ * fscache_write - Request storage of data in the cache
+ * @cookie: The cookie representing the cache object
+ * @iter: The data to store
+ * @pos: The position in the cached data
+ * @object_size: Updated size of object
+ * @gfp: The conditions under which memory allocation should be made
+ * @done: Called upon operation completion
+ *
+ * Request the data described by the iterator be written into the cache.  This
+ * request may be ignored if insufficient space exists in the cache, in which
+ * case -ENOBUFS will be returned.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+int fscache_write(struct fscache_cookie *cookie, struct iov_iter *iter,
+                 loff_t pos, loff_t object_size, gfp_t gfp,
+                 void (*done)(struct fscache_cookie *cookie,
+                              struct iov_iter *iter))
+{
+       if (fscache_cookie_valid(cookie))
+               return __fscache_write(cookie, iter, pos, object_size, gfp, 
done);
+       else
+               return -ENOBUFS;
+}
+
 /**
  * fscache_write_page - Request storage of a page in the cache
  * @cookie: The cookie representing the cache object
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5c60ade2c7d8..5e0f8dcede26 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -207,17 +207,16 @@ afs_edit_dir_reasons;
 #define EM(a, b)       { a, b },
 #define E_(a, b)       { a, b }
 
-TRACE_EVENT(afs_recv_data,
-           TP_PROTO(struct afs_call *call, unsigned count, unsigned offset,
+TRACE_EVENT(afs_receive_data,
+           TP_PROTO(struct afs_call *call, struct iov_iter *iter,
                     bool want_more, int ret),
 
-           TP_ARGS(call, count, offset, want_more, ret),
+           TP_ARGS(call, iter, want_more, ret),
 
            TP_STRUCT__entry(
+                   __field(loff_t,                     remain          )
                    __field(unsigned int,               call            )
                    __field(enum afs_call_state,        state           )
-                   __field(unsigned int,               count           )
-                   __field(unsigned int,               offset          )
                    __field(unsigned short,             unmarshall      )
                    __field(bool,                       want_more       )
                    __field(int,                        ret             )
@@ -227,17 +226,18 @@ TRACE_EVENT(afs_recv_data,
                    __entry->call       = call->debug_id;
                    __entry->state      = call->state;
                    __entry->unmarshall = call->unmarshall;
-                   __entry->count      = count;
-                   __entry->offset     = offset;
+                   __entry->remain     = iov_iter_count(iter);
                    __entry->want_more  = want_more;
                    __entry->ret        = ret;
                           ),
 
-           TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
+           TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
                      __entry->call,
-                     __entry->state, __entry->unmarshall,
-                     __entry->offset, __entry->count,
-                     __entry->want_more, __entry->ret)
+                     __entry->remain,
+                     __entry->unmarshall,
+                     __entry->want_more,
+                     __entry->state,
+                     __entry->ret)
            );
 
 TRACE_EVENT(afs_notify_call,

Reply via email to