The ll_rw_block() function has been deprecated in favor of BIO which
appears to come with large performance improvements.

This patch decreases boot time by close to 40% when using squashfs for
the root file-system. This is observed at least in the context of
starting an Android VM on Chrome OS using crosvm
(https://chromium.googlesource.com/chromiumos/platform/crosvm). The
patch was tested on 4.19 as well as master.

This patch is largely based on Adrien Schildknecht's patch that was
originally sent as https://lkml.org/lkml/2017/9/22/814 though with some
significant changes and simplifications while also taking Phillip
Lougher's feedback into account, around preserving support for
FILE_CACHE in particular.

Signed-off-by: Philippe Liard <[email protected]>
---
 fs/squashfs/block.c | 377 ++++++++++++++++++++++++++++----------------
 1 file changed, 244 insertions(+), 133 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index f098b9f1c396..5ec7528b9d2f 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -26,12 +26,14 @@
  * datablocks and metadata blocks.
  */
 
+#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include <linux/pagemap.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -39,45 +41,207 @@
 #include "decompressor.h"
 #include "page_actor.h"
 
+struct squashfs_bio_request {
+       struct buffer_head **bh;
+       int bh_start;
+       int bh_len;
+};
+
 /*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
+ * Returns the amount of bytes copied to the page actor or an error as
+ * a negative number.
  */
-static struct buffer_head *get_block_length(struct super_block *sb,
-                       u64 *cur_index, int *offset, int *length)
+static int squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
+                               struct squashfs_page_actor *actor,
+                               int blk_offset, int req_length, int blk_size)
 {
-       struct squashfs_sb_info *msblk = sb->s_fs_info;
-       struct buffer_head *bh;
+       int bytes_to_copy, copied_bytes = 0;
+       int actor_offset = 0, bh_offset = 0;
+       const int input_size = nr_buffers * blk_size;
+       const int output_capacity = actor->pages * PAGE_SIZE;
+       void *actor_addr = squashfs_first_page(actor);
+
+       if (blk_offset + req_length > input_size ||
+           req_length > output_capacity)
+               return -EIO;
+
+       while (copied_bytes < req_length) {
+               bytes_to_copy = min_t(int, blk_size - blk_offset,
+                                     PAGE_SIZE - actor_offset);
+               bytes_to_copy = min_t(int, bytes_to_copy,
+                                     req_length - copied_bytes);
+               memcpy(actor_addr + actor_offset,
+                      bh[bh_offset]->b_data + blk_offset, bytes_to_copy);
+
+               actor_offset += bytes_to_copy;
+               copied_bytes += bytes_to_copy;
+               blk_offset += bytes_to_copy;
+
+               if (actor_offset >= PAGE_SIZE) {
+                       actor_offset = 0;
+                       actor_addr = squashfs_next_page(actor);
+               }
+               if (blk_offset >= blk_size) {
+                       blk_offset = 0;
+                       ++bh_offset;
+               }
+       }
+       squashfs_finish_page(actor);
+       return copied_bytes;
+}
+
+static void squashfs_bio_end_io(struct bio *bio)
+{
+       struct squashfs_bio_request *bio_req = bio->bi_private;
+       blk_status_t error = bio->bi_status;
+       int i;
+
+       bio_put(bio);
+
+       for (i = bio_req->bh_start; i < bio_req->bh_start + bio_req->bh_len;
+            ++i) {
+               if (error)
+                       clear_buffer_uptodate(bio_req->bh[i]);
+               else
+                       set_buffer_uptodate(bio_req->bh[i]);
+               unlock_buffer(bio_req->bh[i]);
+       }
+       kfree(bio_req);
+}
+
+static void put_bh_array(struct buffer_head **bh, int start, int len)
+{
+       int i;
+
+       for (i = start; i < start + len; ++i)
+               put_bh(bh[i]);
+}
 
-       bh = sb_bread(sb, *cur_index);
-       if (bh == NULL)
+static struct buffer_head **
+create_buffer_head_array(struct super_block *sb, int nr_buffers, u64 block)
+{
+       int i;
+       struct buffer_head **bh;
+
+       bh = kmalloc_array(nr_buffers, sizeof(*bh), GFP_NOIO);
+       if (!bh)
                return NULL;
 
-       if (msblk->devblksize - *offset == 1) {
-               *length = (unsigned char) bh->b_data[*offset];
-               put_bh(bh);
-               bh = sb_bread(sb, ++(*cur_index));
-               if (bh == NULL)
+       for (i = 0; i < nr_buffers; ++i) {
+               bh[i] = sb_getblk(sb, block + i);
+               if (!bh[i]) {
+                       put_bh_array(bh, 0, i);
+                       kfree(bh);
                        return NULL;
-               *length |= (unsigned char) bh->b_data[0] << 8;
-               *offset = 1;
-       } else {
-               *length = (unsigned char) bh->b_data[*offset] |
-                       (unsigned char) bh->b_data[*offset + 1] << 8;
-               *offset += 2;
-
-               if (*offset == msblk->devblksize) {
-                       put_bh(bh);
-                       bh = sb_bread(sb, ++(*cur_index));
-                       if (bh == NULL)
-                               return NULL;
-                       *offset = 0;
                }
        }
-
        return bh;
 }
 
+static void free_bh_array(struct buffer_head **bh, int nr_buffers)
+{
+       if (bh) {
+               put_bh_array(bh, 0, nr_buffers);
+               kfree(bh);
+       }
+}
+
+/*
+ * Returns 0 on success and fills bh_ptr, nr_buffers and block_offset. An error
+ * is otherwise returned as a negative number. Note that the caller must free
+ * *bh_ptr on success.
+ */
+static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
+                            struct buffer_head ***bh_ptr, int *nr_buffers,
+                            int *block_offset)
+{
+       struct bio *bio = NULL;
+       struct buffer_head *bh, **bh_array;
+       struct squashfs_bio_request *bio_req = NULL;
+       int i, prev_block = 0;
+
+       struct squashfs_sb_info *msblk = sb->s_fs_info;
+       const u64 read_start = round_down(index, msblk->devblksize);
+       const sector_t block = read_start >> msblk->devblksize_log2;
+
+       const u64 read_end = round_up(index + length, msblk->devblksize);
+       const sector_t block_end = read_end >> msblk->devblksize_log2;
+
+       const int blksz = msblk->devblksize;
+       const int bio_max_pages = min_t(int, block_end - block, BIO_MAX_PAGES);
+       int offset = read_start - round_down(index, PAGE_SIZE);
+       int res;
+
+       *block_offset = index & ((1 << msblk->devblksize_log2) - 1);
+       *nr_buffers = block_end - block;
+       bh_array = create_buffer_head_array(sb, *nr_buffers, block);
+       *bh_ptr = bh_array;
+       if (!bh_array)
+               return -ENOMEM;
+
+       /* Create and submit the BIOs */
+       for (i = 0; i < *nr_buffers; ++i, offset += blksz) {
+               bh = bh_array[i];
+               lock_buffer(bh);
+               if (buffer_uptodate(bh)) {
+                       unlock_buffer(bh);
+                       continue;
+               }
+               offset %= PAGE_SIZE;
+
+               /* Append the buffer to the current BIO if it is contiguous */
+               if (bio && bio_req && prev_block + 1 == i) {
+                       if (bio_add_page(bio, bh->b_page, blksz, offset)) {
+                               bio_req->bh_len++;
+                               prev_block = i;
+                               continue;
+                       }
+               }
+
+               /* Otherwise, submit the current BIO and create a new one */
+               if (bio)
+                       submit_bio(bio);
+
+               bio_req = kzalloc(sizeof(struct squashfs_bio_request),
+                                 GFP_NOIO);
+               bio = bio_req ? bio_alloc(GFP_NOIO, bio_max_pages) : NULL;
+               if (!bio) {
+                       kfree(bio_req);
+                       unlock_buffer(bh);
+                       res = -ENOMEM;
+                       goto cleanup;
+               }
+
+               bio_set_dev(bio, sb->s_bdev);
+               bio->bi_iter.bi_sector =
+                       (block + i) * (msblk->devblksize >> SECTOR_SHIFT);
+               bio->bi_private = bio_req;
+               bio->bi_end_io = squashfs_bio_end_io;
+               bio->bi_opf = READ;
+
+               bio_req->bh = bh_array;
+               bio_req->bh_start = i;
+               bio_req->bh_len = 1;
+               bio_add_page(bio, bh->b_page, blksz, offset);
+               prev_block = i;
+       }
+       if (bio)
+               submit_bio(bio);
+
+       res = 0;
+
+cleanup:
+       for (i = 0; i < *nr_buffers; ++i) {
+               wait_on_buffer(bh_array[i]);
+               if (!buffer_uptodate(bh_array[i]) && res == 0)
+                       res = -EIO;
+       }
+       if (res) {
+               free_bh_array(bh_array, *nr_buffers);
+               *bh_ptr = NULL;
+       }
+       return res;
+}
 
 /*
  * Read and decompress a metadata block or datablock.  Length is non-zero
@@ -89,129 +253,76 @@ static struct buffer_head *get_block_length(struct 
super_block *sb,
  * algorithms).
  */
 int squashfs_read_data(struct super_block *sb, u64 index, int length,
-               u64 *next_index, struct squashfs_page_actor *output)
+                      u64 *next_index, struct squashfs_page_actor *output)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
-       struct buffer_head **bh;
-       int offset = index & ((1 << msblk->devblksize_log2) - 1);
-       u64 cur_index = index >> msblk->devblksize_log2;
-       int bytes, compressed, b = 0, k = 0, avail, i;
-
-       bh = kcalloc(((output->length + msblk->devblksize - 1)
-               >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
-       if (bh == NULL)
-               return -ENOMEM;
+       int res;
+       struct buffer_head **bh = NULL;
+       int nr_buffers;
+       int compressed;
+       int offset;
 
-       if (length) {
-               /*
-                * Datablock.
-                */
-               bytes = -offset;
-               compressed = SQUASHFS_COMPRESSED_BLOCK(length);
-               length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
-               if (next_index)
-                       *next_index = index + length;
-
-               TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-                       index, compressed ? "" : "un", length, output->length);
-
-               if (length < 0 || length > output->length ||
-                               (index + length) > msblk->bytes_used)
-                       goto read_failure;
-
-               for (b = 0; bytes < length; b++, cur_index++) {
-                       bh[b] = sb_getblk(sb, cur_index);
-                       if (bh[b] == NULL)
-                               goto block_release;
-                       bytes += msblk->devblksize;
-               }
-               ll_rw_block(REQ_OP_READ, 0, b, bh);
-       } else {
+       if (length == 0) {
                /*
                 * Metadata block.
                 */
-               if ((index + 2) > msblk->bytes_used)
-                       goto read_failure;
+               length = 2;
+               if (index + length > msblk->bytes_used) {
+                       res = -EIO;
+                       goto out;
+               }
+               res = squashfs_bio_read(sb, index, length, &bh, &nr_buffers,
+                                       &offset);
+               if (res)
+                       goto out;
 
-               bh[0] = get_block_length(sb, &cur_index, &offset, &length);
-               if (bh[0] == NULL)
-                       goto read_failure;
-               b = 1;
+               /* Extract the length of the metadata block */
+               length = (u8) bh[0]->b_data[offset];
+               length |= offset == msblk->devblksize - 1
+                       ? (u8) bh[1]->b_data[0] << 8
+                       : (u8) bh[0]->b_data[offset + 1] << 8;
 
-               bytes = msblk->devblksize - offset;
                compressed = SQUASHFS_COMPRESSED(length);
                length = SQUASHFS_COMPRESSED_SIZE(length);
-               if (next_index)
-                       *next_index = index + length + 2;
 
-               TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
-                               compressed ? "" : "un", length);
-
-               if (length < 0 || length > output->length ||
-                                       (index + length) > msblk->bytes_used)
-                       goto block_release;
-
-               for (; bytes < length; b++) {
-                       bh[b] = sb_getblk(sb, ++cur_index);
-                       if (bh[b] == NULL)
-                               goto block_release;
-                       bytes += msblk->devblksize;
-               }
-               ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
+               free_bh_array(bh, nr_buffers);
+               bh = NULL;
+               index += 2;
+       } else {
+               /*
+                * Data block.
+                */
+               compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+               length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
        }
+       if (next_index)
+               *next_index = index + length;
 
-       for (i = 0; i < b; i++) {
-               wait_on_buffer(bh[i]);
-               if (!buffer_uptodate(bh[i]))
-                       goto block_release;
-       }
+       res = squashfs_bio_read(sb, index, length, &bh, &nr_buffers, &offset);
+       if (res)
+               goto out;
 
        if (compressed) {
-               if (!msblk->stream)
-                       goto read_failure;
-               length = squashfs_decompress(msblk, bh, b, offset, length,
-                       output);
-               if (length < 0)
-                       goto read_failure;
-       } else {
-               /*
-                * Block is uncompressed.
-                */
-               int in, pg_offset = 0;
-               void *data = squashfs_first_page(output);
-
-               for (bytes = length; k < b; k++) {
-                       in = min(bytes, msblk->devblksize - offset);
-                       bytes -= in;
-                       while (in) {
-                               if (pg_offset == PAGE_SIZE) {
-                                       data = squashfs_next_page(output);
-                                       pg_offset = 0;
-                               }
-                               avail = min_t(int, in, PAGE_SIZE -
-                                               pg_offset);
-                               memcpy(data + pg_offset, bh[k]->b_data + offset,
-                                               avail);
-                               in -= avail;
-                               pg_offset += avail;
-                               offset += avail;
-                       }
-                       offset = 0;
-                       put_bh(bh[k]);
+               if (!msblk->stream) {
+                       res = -EIO;
+                       goto out;
                }
-               squashfs_finish_page(output);
+               /* Note that this calls put_bh() */
+               res = squashfs_decompress(msblk, bh, nr_buffers, offset, length,
+                                         output);
+               kfree(bh);
+               bh = NULL;
+       } else {
+               res = squashfs_bh_to_actor(bh, nr_buffers, output, offset,
+                                          length, msblk->devblksize);
        }
+out:
+       TRACE("compressed=%d index=%lld length=%d next_index=%lld result=%d\n",
+             compressed, index, length, next_index ? *next_index : -1, res);
 
-       kfree(bh);
-       return length;
-
-block_release:
-       for (; k < b; k++)
-               put_bh(bh[k]);
+       free_bh_array(bh, nr_buffers);
 
-read_failure:
-       ERROR("squashfs_read_data failed to read block 0x%llx\n",
-                                       (unsigned long long) index);
-       kfree(bh);
-       return -EIO;
+       if (res < 0)
+               ERROR("Failed to read block 0x%llx\n", index);
+       return res;
 }
-- 
2.23.0.866.gb869b98d4c-goog

Reply via email to