From: Yu Kuai <yuku...@huawei.com> Each bit is one byte and contain 6 different state, and there are total 8 different action can change state, see details in the following form:
| | Startwrite | Startsync | Endsync | Abortsync| Reload | Daemon | Discard | Stale | | --------- | ---------- | --------- | ------- | ------- | -------- | ------ | --------- | --------- | | Unwritten | Dirty | x | x | x | x | x | x | x | | Clean | Dirty | x | x | x | x | x | Unwritten | NeedSync | | Dirty | x | x | x | x | NeedSync | Clean | Unwritten | NeedSync | | NeedSync | x | Syncing | x | x | x | x | Unwritten | x | | Syncing | x | Syncing | Dirty | NeedSync | NeedSync | x | Unwritten | NeedSync | This patch implement the state machine first, and following patches will use it to implement new llbitmap. Signed-off-by: Yu Kuai <yuku...@huawei.com> --- drivers/md/md-llbitmap.c | 256 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 drivers/md/md-llbitmap.c diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c new file mode 100644 index 000000000000..1f97b6868279 --- /dev/null +++ b/drivers/md/md-llbitmap.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/blkdev.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/timer.h> +#include <linux/sched.h> +#include <linux/list.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/buffer_head.h> +#include <linux/seq_file.h> +#include <trace/events/block.h> + +#include "md.h" +#include "md-bitmap.h" + +/* + * #### Background + * + * Redundant data is used to enhance data fault tolerance, and the storage + * method for redundant data vary depending on the RAID levels. And it's + * important to maintain the consistency of redundant data. + * + * Bitmap is used to record which data blocks have been synchronized and which + * ones need to be resynchronized or recovered. Each bit in the bitmap + * represents a segment of data in the array. When a bit is set, it indicates + * that the multiple redundant copies of that data segment may not be + * consistent. Data synchronization can be performed based on the bitmap after + * power failure or readding a disk. If there is no bitmap, a full disk + * synchronization is required. + * + * #### Key Concept + * + * ##### State Machine + * + * Each bit is one byte, contain 6 difference state, see llbitmap_state. And + * there are total 8 differenct actions, see llbitmap_action, can change state: + * + * llbitmap state machine: transitions between states + * + * | | Startwrite | Startsync | Endsync | Abortsync| Reload | Daemon | Discard | Stale | + * | --------- | ---------- | --------- | ------- | ------- | -------- | ------ | --------- | --------- | + * | Unwritten | Dirty | x | x | x | x | x | x | x | + * | Clean | Dirty | x | x | x | x | x | Unwritten | NeedSync | + * | Dirty | x | x | x | x | NeedSync | Clean | Unwritten | NeedSync | + * | NeedSync | x | Syncing | x | x | x | x | Unwritten | x | + * | Syncing | x | Syncing | Dirty | NeedSync | NeedSync | x | Unwritten | NeedSync | + * + * special illustration: + * - Unwritten is special state, which means user never write data, hence there + * is no need to resync/recover data. This is safe if user create filesystems + * for the array, filesystem will make sure user will get zero data for + * unwritten blocks. + * - After resync is done, change state from Syncing to Dirty first, in case + * Startwrite happen before the state is Clean. + */ + +#define BITMAP_MAX_PAGES 32 +#define BITMAP_SB_SIZE 1024 + +enum llbitmap_state { + /* No valid data, init state after assemble the array */ + BitUnwritten = 0, + /* data is consistent */ + BitClean, + /* data will be consistent after IO is done, set directly for writes */ + BitDirty, + /* + * data need to be resynchronized: + * 1) set directly for writes if array is degraded, prevent full disk + * synchronization after readding a disk; + * 2) reassemble the array after power failure, and dirty bits are + * found after reloading the bitmap; + * */ + BitNeedSync, + /* data is synchronizing */ + BitSyncing, + nr_llbitmap_state, + BitNone = 0xff, +}; + +enum llbitmap_action { + /* User write new data, this is the only acton from IO fast path */ + BitmapActionStartwrite = 0, + /* Start recovery */ + BitmapActionStartsync, + /* Finish recovery */ + BitmapActionEndsync, + /* Failed recovery */ + BitmapActionAbortsync, + /* Reassemble the array */ + BitmapActionReload, + /* Daemon thread is trying to clear dirty bits */ + BitmapActionDaemon, + /* Data is deleted */ + BitmapActionDiscard, + /* + * Bitmap is stale, mark all bits in addition to BitUnwritten to + * BitNeedSync. + */ + BitmapActionStale, + nr_llbitmap_action, + /* Init state is BitUnwritten */ + BitmapActionInit, +}; + +struct llbitmap { + struct mddev *mddev; + /* hidden disk to manage bitmap IO */ + struct gendisk *bitmap_disk; + /* opened hidden disk */ + struct file *bitmap_file; + int nr_pages; + struct page *pages[BITMAP_MAX_PAGES]; + + struct bio_set bio_set; + struct bio_list retry_list; + struct work_struct retry_work; + spinlock_t retry_lock; + + /* shift of one chunk */ + unsigned long chunkshift; + /* size of one chunk in sector */ + unsigned long chunksize; + /* total number of chunks */ + unsigned long chunks; + /* fires on first BitDirty state */ + struct timer_list pending_timer; + struct work_struct daemon_work; + + unsigned long flags; + __u64 events_cleared; +}; + +static char state_machine[nr_llbitmap_state][nr_llbitmap_action] = { + [BitUnwritten] = {BitDirty, BitNone, BitNone, BitNone, BitNone, BitNone, BitNone, BitNone}, + [BitClean] = {BitDirty, BitNone, BitNone, BitNone, BitNone, BitNone, BitUnwritten, BitNeedSync}, + [BitDirty] = {BitNone, BitNone, BitNone, BitNone, BitNeedSync, BitClean, BitUnwritten, BitNeedSync}, + [BitNeedSync] = {BitNone, BitSyncing, BitNone, BitNone, BitNone, BitNone, BitUnwritten, BitNone}, + [BitSyncing] = {BitNone, BitSyncing, BitDirty, BitNeedSync, BitNeedSync, BitNone, BitUnwritten, BitNeedSync}, +}; + +static enum llbitmap_state state_from_page(struct page *page, loff_t pos) +{ + u8 *p = kmap_local_page(page); + enum llbitmap_state state = p[offset_in_page(pos)]; + + kunmap_local(p); + return state; +} + +static void state_to_page(struct page *page, enum llbitmap_state state, + loff_t pos) +{ + u8 *p = kmap_local_page(page); + + p[offset_in_page(pos)] = state; + set_page_dirty(page); + kunmap_local(p); +} + +static int llbitmap_read(struct llbitmap *llbitmap, enum llbitmap_state *state, + loff_t pos) +{ + pos += BITMAP_SB_SIZE; + *state = state_from_page(llbitmap->pages[pos >> PAGE_SHIFT], pos); + return 0; +} + +static int llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state, + loff_t pos) +{ + pos += BITMAP_SB_SIZE; + state_to_page(llbitmap->pages[pos >> PAGE_SHIFT], state, pos); + return 0; +} + +/* The return value is only used from resync, where @start == @end. */ +static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap, + unsigned long start, + unsigned long end, + enum llbitmap_action action) +{ + struct mddev *mddev = llbitmap->mddev; + enum llbitmap_state state = BitNone; + bool need_recovery = false; + + if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) + return BitNone; + + while (start <= end) { + ssize_t ret; + enum llbitmap_state c; + + if (action == BitmapActionInit) { + state = BitUnwritten; + ret = llbitmap_write(llbitmap, state, start); + if (ret < 0) { + set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); + return BitNone; + } + + start++; + continue; + } + + ret = llbitmap_read(llbitmap, &c, start); + if (ret < 0) { + set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); + return BitNone; + } + + if (c < 0 || c >= nr_llbitmap_state) { + pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n", + __func__, start, c, action); + c = BitNeedSync; + goto write_bitmap; + } + + if (c == BitNeedSync) + need_recovery = true; + + state = state_machine[c][action]; + if (state == BitNone) { + start++; + continue; + } + +write_bitmap: + ret = llbitmap_write(llbitmap, state, start); + if (ret < 0) { + set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags); + return BitNone; + } + + if (state == BitNeedSync) + need_recovery = true; + else if (state == BitDirty && + !timer_pending(&llbitmap->pending_timer)) + mod_timer(&llbitmap->pending_timer, + jiffies + mddev->bitmap_info.daemon_sleep * HZ); + + start++; + } + + if (need_recovery) { + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + set_bit(MD_RECOVERY_SYNC, &mddev->recovery); + md_wakeup_thread(mddev->thread); + } + + return state; +} -- 2.39.2