Signed-off-by: Abhi Das <a...@redhat.com>
---
fs/gfs2/incore.h | 3 +
fs/gfs2/lops.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/gfs2/lops.h | 1 +
fs/gfs2/ops_fstype.c | 2 +
fs/gfs2/recovery.c | 116 ++---------------
fs/gfs2/sys.c | 27 ++--
6 files changed, 391 insertions(+), 117 deletions(-)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b96d39c..424687f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -542,6 +542,8 @@ struct gfs2_jdesc {
int jd_recover_error;
/* Replay stuff */
+ struct gfs2_log_header_host jd_jhead;
+ struct mutex jd_jh_mutex;
unsigned int jd_found_blocks;
unsigned int jd_found_revokes;
unsigned int jd_replayed_blocks;
@@ -610,6 +612,7 @@ struct gfs2_tune {
unsigned int gt_complain_secs;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
+ unsigned int gt_bio_pool_size; /* No of bios to use for the bio_pool */
};
enum {
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f2567f9..69fc058 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/list_sort.h>
+#include "bmap.h"
#include "dir.h"
#include "gfs2.h"
#include "incore.h"
@@ -370,6 +371,364 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct
page *page)
gfs2_log_bmap(sdp));
}
+/*
+ * The bio_pool structure is an array of bios of length 'size'.
+ * 'cur' is the index of the next bio to be submitted for I/O.
+ * 'wait' is the index of bio we need to wait on for I/O completion.
+ * 'inflight' is the number of bios submitted, but not yet completed.
+ */
+struct bio_pool {
+ struct bio **bios;
+ unsigned int size;
+ unsigned int cur;
+ unsigned int wait;
+ unsigned int inflight;
+};
+typedef int (search_bio_t) (struct gfs2_jdesc *jd, const void *ptr);
+
+/**
+ * bio_pool_submit_bio - Submit the current bio in the pool
+ *
+ * @pool: The bio pool
+ *
+ * Submit the current bio (pool->bios[pool->cur]) and update internal pool
+ * management variables. If pool->inflight == pool->size, we've maxed out all
+ * the bios in our pool and the caller needs to wait on some bios, process and
+ * free them so new ones can be added.
+ *
+ * Returns: 1 if we maxed out our bios, 0, otherwise
+ */
+
+static int bio_pool_submit_bio(struct bio_pool *pool)
+{
+ int ret = 0;
+ BUG_ON(!pool || !pool->bios || !pool->bios[pool->cur]);
+
+ bio_set_op_attrs(pool->bios[pool->cur], REQ_OP_READ, 0);
+ submit_bio(pool->bios[pool->cur]);
+ pool->cur = pool->cur == pool->size - 1 ? 0 : pool->cur + 1;
+ pool->inflight++;
+ if (pool->inflight == pool->size)
+ ret = 1;
+ return ret;
+}
+
+/**
+ * bio_pool_get_cur - Do what's necessary to get a valid bio for the caller.
+ *
+ * @pool: The bio pool
+ * @sdp: The gfs2 superblock
+ * @blkno: The block number we wish to add to a bio
+ * @end_io: The end_io completion callback
+ *
+ * If there's no currently active bio, we allocate one for the blkno and
return.
+ *
+ * If there's an active bio at pool->bios[pool->cur], we check if the requested
+ * block maybe to tacked onto it. If yes, we do nothing and return.
+ *
+ * If the block can't be added (non-contiguous), we submit the current bio.
+ * pool->cur, pool->inflight will change and we fall through to allocate a new
+ * bio and return. In this case, it is possible that submitting the current bio
+ * has maxed out our readahead (bio_pool_submit_bio() returns 1). We pass this
+ * error code back to the caller.
+ *
+ * Returns: 1 if bio_pool_submit_bio() maxed readahead, else 0.
+ */
+
+static int bio_pool_get_cur(struct bio_pool *pool, struct gfs2_sbd *sdp,
+ u64 blkno, bio_end_io_t end_io, void *private)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ struct bio *bio;
+ int ret = 0;
+
+ BUG_ON(!pool || !pool->bios);
+
+ if (pool->bios[pool->cur]) {
+ u64 nblk;
+ nblk = bio_end_sector(pool->bios[pool->cur]);
+ nblk >>= sdp->sd_fsb2bb_shift;
+ if (blkno == nblk)
+ return 0;
+ ret = bio_pool_submit_bio(pool);
+ }
+ bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+ bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_end_io = end_io;
+ bio->bi_private = private;
+ pool->bios[pool->cur] = bio;
+
+ return ret;
+}
+
+/**
+ * gfs2_jhead_search - search a block for the journal head
+ *
+ * @jd: The journal descriptor
+ * @ptr: Pointer to the block data
+ *
+ * Among the valid log headers, we try to locate the journal head with the
+ * largest sequence number that is also monotonically increasing.
+ *
+ * Returns: 1, if found, 0 otherwise.
+ */
+
+int gfs2_jhead_search(struct gfs2_jdesc *jd, const void *ptr)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_log_header_host uninitialized_var(lh);
+ int ret = 0;
+
+ if (!__get_log_header(sdp, ptr, 0, &lh)) {
+ if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
+ jd->jd_jhead = lh;
+ else
+ ret = 1;
+ }
+ return ret;
+}
+
+/**
+ * gfs2_bio_process - search a bio
+ *
+ * @jd: The journal descriptor
+ * @bio: The bio to process
+ * @search: The search function
+ *
+ * For each page in the bio, call the 'search' function to look for the journal
+ * head. Note that the bio and its pages are cleaned up in this function, so
+ * the 'search' function ptr can be NULL and the result of this function would
+ * simply be a cleanup of the bio.
+ *
+ * Returns: 1 if jhead was found, 0 otherwise.
+ */
+
+int gfs2_bio_process(struct gfs2_jdesc *jd, struct bio *bio,
+ search_bio_t search)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct page *page;
+ struct bio_vec *bvec;
+ int i, found = 0;
+
+ if (bio->bi_status) {
+ fs_err(sdp, "Error %d reading from journal, jid=%u\n",
+ bio->bi_status, jd->jd_jid);
+ }
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ page = bvec->bv_page;
+ if (search && !found)
+ found = search(jd, page_address(page));
+ mempool_free(page, gfs2_page_pool);
+ }
+
+ bio_put(bio);
+ return found;
+}
+
+static void gfs2_bio_wait(struct bio *bio)
+{
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!READ_ONCE(bio->bi_private))
+ break;
+ io_schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
+static void gfs2_bio_end_io(struct bio *bio)
+{
+ struct task_struct *waiter = bio->bi_private;
+
+ WRITE_ONCE(bio->bi_private, NULL);
+ wake_up_process(waiter);
+}
+
+/**
+ * bio_pool_wait_process - wait on the next bio and process the completed bio.
+ *
+ * @pool: The bio pool
+ * @jd: The journal descriptor
+ * @search: The function to pass to gfs2_bio_process() to process the bio.
+ *
+ * Wait on the next bio indexed by pool->wait. Upon completion, call
+ * gfs2_bio_process() to process the bio. Update the internal pool management
+ * variables.
+ *
+ * Returns: 1 if gfs2_bio_process() found the jhead, 0 otherwise.
+ */
+
+static int bio_pool_wait_process(struct bio_pool *pool, struct gfs2_jdesc *jd,
+ search_bio_t search)
+{
+ int ret;
+ BUG_ON(!pool || !pool->bios || !pool->bios[pool->wait]);
+ gfs2_bio_wait(pool->bios[pool->wait]);
+ ret = gfs2_bio_process(jd, pool->bios[pool->wait], search);
+ pool->bios[pool->wait] = NULL;
+ pool->inflight--;
+ pool->wait = pool->wait == pool->size - 1 ? 0 : pool->wait + 1;
+ return ret;
+}
+
+/**
+ * bio_pool_process_page - Add a page to the pool and flush bios, wait
+ * for completion and process as necessary.
+ * @pool: The bio_pool
+ * @page: The page to be added
+ * @jd: The journal descriptor
+ * @blkno: The block corresponding to the page
+ *
+ * As a general rule, we wait on the next bio if we submitted enough bios such
+ * that pool.inflight == pool.size.
+ *
+ * 1. Figure out which bio in the pool is able to take the page. This may mean
+ * submitting a previous bio. This may also involve waiting on a bio if the
+ * number of inflight bios is maxed out.
+ * 2. Add the page to the current bio. On successful addition, we simply return
+ * unless this was the last page. If so, we submit this bio and wait for
*all*
+ * the inflight bios to complete and look for the jhead them. We do this
+ * because this function won't be called back again after the last page.
+ * If adding the page fails due to the current bio being full, step 3.
+ * 3. Submit the current bio and wait on the next bio if needed. Try to add the
+ * page again into the a new bio
+ *
+ * Returns: 0, if page was queued for search, 1, if found and errno otherwise
+ */
+
+static int bio_pool_process_page(struct bio_pool *pool, struct page *page,
+ struct gfs2_jdesc *jd, u64 blkno)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct super_block *sb = sdp->sd_vfs;
+ int ret, last = page->private;
+
+ BUG_ON(!pool || !pool->bios);
+
+try_again:
+ ret = bio_pool_get_cur(pool, sdp, blkno, gfs2_bio_end_io, current);
+ if (ret) { /* We had to submit current bio and maxed out, so we wait */
+ ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
+ if (ret)
+ goto out;
+ }
+
+ ret = bio_add_page(pool->bios[pool->cur], page, sb->s_blocksize, 0);
+ if (ret > 0) { /* successfully added */
+ ret = 0;
+ goto out;
+ }
+ /* bio is full, need to submit it */
+ ret = bio_pool_submit_bio(pool);
+ if (ret) {
+ ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
+ if (ret)
+ goto out;
+ }
+ goto try_again;
+
+out:
+ if (ret == 0 && last) { /* look for jhead in the last parts */
+ bio_pool_submit_bio(pool);
+ while (pool->inflight) {
+ ret = bio_pool_wait_process(pool, jd,
gfs2_jhead_search);
+ if (ret)
+ break;
+ };
+ }
+ return ret;
+}
+
+static int bio_pool_init(struct bio_pool *pool, unsigned int size)
+{
+ BUG_ON(!pool || size < 2 || size > 64);
+ pool->bios = kcalloc(size, sizeof(struct bio*), GFP_NOFS);
+ if (!pool->bios)
+ return ENOMEM;
+
+ pool->size = size;
+ pool->cur = 0;
+ pool->wait = 0;
+ pool->inflight = 0;
+ return 0;
+}
+
+static void bio_pool_cleanup(struct bio_pool *pool, struct gfs2_jdesc *jd)
+{
+ int i;
+
+ for (i = 0; i < pool->size; i++) {
+ if (!pool->bios[i])
+ continue;
+ gfs2_bio_wait(pool->bios[i]);
+ gfs2_bio_process(jd, pool->bios[i], NULL);
+ }
+}
+
+static void bio_pool_uninit(struct bio_pool *pool, struct gfs2_jdesc *jd)
+{
+ if (!pool)
+ return;
+ if (pool->bios) {
+ bio_pool_cleanup(pool, jd);
+ kfree(pool->bios);
+ memset(pool, 0, sizeof(struct bio_pool));
+ }
+}
+
+/**
+ * gfs2_log_jh_lookup - Use a pool of bios to read in the journal and locate
+ * the journal head
+ * @jd: The journal descriptor
+ *
+ * Use the pool of bios for readahead. When enough bios are inflight (i.e
+ * submitted), we wait for the earliest submitted bio to complete before
+ * creating another. This way, we can get some readahead going as well as
+ * process the completed bios sequentially.
+ *
+ * We don't submit any more bios once we've found the head.
+ *
+ * Returns: 0 on success(jd->jd_jhead contains the journal head), errno
+ * otherwise
+ */
+
+int gfs2_log_jh_lookup(struct gfs2_jdesc *jd)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_journal_extent *je;
+ int i, ret = 0;
+
+ struct bio_pool bpool;
+
+ memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
+ ret = bio_pool_init(&bpool, gfs2_tune_get(sdp, gt_bio_pool_size));
+ if (ret)
+ return ret;
+
+ if (list_empty(&jd->extent_list))
+ gfs2_map_journal_extents(sdp, jd);
+
+ list_for_each_entry(je, &jd->extent_list, list) {
+ for (i = 0; i < je->blocks; i++) {
+ struct page *page;
+ page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ page_private(page) = (list_is_last(&je->list,
&jd->extent_list)
+ && i == (je->blocks - 1));
+ ret = bio_pool_process_page(&bpool, page, jd,
je->dblock + i);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ bio_pool_uninit(&bpool, jd);
+ if (ret == 1) /* found */
+ return 0;
+ return ret;
+}
+
static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
u32 ld_length, u32 ld_data1)
{
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index e494939..10589fd 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page
*page,
extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern int gfs2_log_jh_lookup(struct gfs2_jdesc *jd);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c2469833b..362a9d4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -61,6 +61,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_new_files_jdata = 0;
gt->gt_max_readahead = BIT(18);
gt->gt_complain_secs = 10;
+ gt->gt_bio_pool_size = 16;
}
static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -579,6 +580,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct
gfs2_holder *ji_gh)
break;
}
+ mutex_init(&jd->jd_jh_mutex);
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2dac430..fe267cf 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -149,6 +149,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct
gfs2_log_header *lh,
return 0;
}
+
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
@@ -182,85 +183,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned
int blk,
}
/**
- * find_good_lh - find a good log header
- * @jd: the journal
- * @blk: the segment to start searching from
- * @lh: the log header to fill in
- * @forward: if true search forward in the log, else search backward
- *
- * Call get_log_header() to get a log header for a segment, but if the
- * segment is bad, either scan forward or backward until we find a good one.
- *
- * Returns: errno
- */
-
-static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
- struct gfs2_log_header_host *head)
-{
- unsigned int orig_blk = *blk;
- int error;
-
- for (;;) {
- error = get_log_header(jd, *blk, head);
- if (error <= 0)
- return error;
-
- if (++*blk == jd->jd_blocks)
- *blk = 0;
-
- if (*blk == orig_blk) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
- }
-}
-
-/**
- * jhead_scan - make sure we've found the head of the log
- * @jd: the journal
- * @head: this is filled in with the log descriptor of the head
- *
- * At this point, seg and lh should be either the head of the log or just
- * before. Scan forward until we find the head.
- *
- * Returns: errno
- */
-
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
- unsigned int blk = head->lh_blkno;
- struct gfs2_log_header_host lh;
- int error;
-
- for (;;) {
- if (++blk == jd->jd_blocks)
- blk = 0;
-
- error = get_log_header(jd, blk, &lh);
- if (error < 0)
- return error;
- if (error == 1)
- continue;
-
- if (lh.lh_sequence == head->lh_sequence) {
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
- return -EIO;
- }
- if (lh.lh_sequence < head->lh_sequence)
- break;
-
- *head = lh;
- }
-
- return 0;
-}
-
-/**
* gfs2_find_jhead - find the head of a log
* @jd: the journal
* @head: the log descriptor for the head of the log is returned here
*
- * Do a binary search of a journal and find the valid log entry with the
+ * Do a search of a journal and find the valid log entry with the
* highest sequence number. (i.e. the log head)
*
* Returns: errno
@@ -268,40 +195,15 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct
gfs2_log_header_host *head)
int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
- struct gfs2_log_header_host lh_1, lh_m;
- u32 blk_1, blk_2, blk_m;
- int error;
-
- blk_1 = 0;
- blk_2 = jd->jd_blocks - 1;
+ int ret;
- for (;;) {
- blk_m = (blk_1 + blk_2) / 2;
+ mutex_lock(&jd->jd_jh_mutex);
+ ret = gfs2_log_jh_lookup(jd);
+ if (ret == 0)
+ *head = jd->jd_jhead;
+ mutex_unlock(&jd->jd_jh_mutex);
- error = find_good_lh(jd, &blk_1, &lh_1);
- if (error)
- return error;
-
- error = find_good_lh(jd, &blk_m, &lh_m);
- if (error)
- return error;
-
- if (blk_1 == blk_m || blk_m == blk_2)
- break;
-
- if (lh_1.lh_sequence <= lh_m.lh_sequence)
- blk_1 = blk_m;
- else
- blk_2 = blk_m;
- }
-
- error = jhead_scan(jd, &lh_1);
- if (error)
- return error;
-
- *head = lh_1;
-
- return error;
+ return ret;
}
/**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 1787d29..a8a9307 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -573,7 +573,8 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp,
const char *buf,
}
static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
- int check_zero, const char *buf, size_t len)
+ int check_zero, int check_range, unsigned int low,
+ unsigned int high, const char *buf, size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x;
@@ -589,6 +590,9 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int
*field,
if (check_zero && !x)
return -EINVAL;
+ if (check_range && (x < low || x > high))
+ return -EINVAL;
+
spin_lock(>->gt_spin);
*field = x;
spin_unlock(>->gt_spin);
@@ -605,20 +609,22 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char
*buf) \
}
\
TUNE_ATTR_3(name, name##_show, store)
-#define TUNE_ATTR(name, check_zero) \
+#define TUNE_ATTR(name, check_zero, check_range, low, high) \
static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t
len)\
{
\
- return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
+ return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, check_range,\
+ low, high, buf, len); \
}
\
TUNE_ATTR_2(name, name##_store)
-TUNE_ATTR(quota_warn_period, 0);
-TUNE_ATTR(quota_quantum, 0);
-TUNE_ATTR(max_readahead, 0);
-TUNE_ATTR(complain_secs, 0);
-TUNE_ATTR(statfs_slow, 0);
-TUNE_ATTR(new_files_jdata, 0);
-TUNE_ATTR(statfs_quantum, 1);
+TUNE_ATTR(quota_warn_period, 0, 0, 0, 0);
+TUNE_ATTR(quota_quantum, 0, 0, 0, 0);
+TUNE_ATTR(max_readahead, 0, 0, 0, 0);
+TUNE_ATTR(complain_secs, 0, 0, 0, 0);
+TUNE_ATTR(statfs_slow, 0, 0, 0, 0);
+TUNE_ATTR(new_files_jdata, 0, 0, 0, 0);
+TUNE_ATTR(statfs_quantum, 1, 0, 0, 0);
+TUNE_ATTR(bio_pool_size, 1, 1, 2, 64);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
@@ -630,6 +636,7 @@ static struct attribute *tune_attrs[] = {
&tune_attr_statfs_quantum.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
+ &tune_attr_bio_pool_size.attr,
NULL,
};