From: Yu Kuai <yuku...@huawei.com> IO fast path will set bits to dirty, and those dirty bits must be cleared after IO is done, to prevent unnecessary data recovery after power failure.
This patch add a bitmap page level barrier and related APIs, - llbitmap_{suspend, resume} will be used by daemon from slow path: 1) suspend new write IO; 2) wait for inflight write IO to be done; 3) clear dirty bits; 4) resume write IO; - llbitmap_{raise, release}_barrier will be used in IO fast path, the overhead is just one percpu ref get if the page is not suspended. Signed-off-by: Yu Kuai <yuku...@huawei.com> --- drivers/md/md-llbitmap.c | 119 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c index bbd8a7c99577..7d4a0e81f8e1 100644 --- a/drivers/md/md-llbitmap.c +++ b/drivers/md/md-llbitmap.c @@ -63,12 +63,29 @@ * llbitmap_add_disk(). And a file is created as well to manage bitmap IO for * this disk, see details in llbitmap_open_disk(). Read/write bitmap is * converted to buffer IO to this file. + * + * IO fast path will set bits to dirty, and those dirty bits will be cleared + * by daemon after IO is done. llbitmap_barrier is used to syncronize between + * IO path and daemon; + * + * IO patch: + * 1) try to grab a reference, if succeed, set expire time after 5s and return; + * 2) wait for daemon to finish clearing dirty bits; + * + * Daemon(Daemon will be wake up every daemon_sleep seconds): + * For each page: + * 1) check if page expired, if not skip this page; for expired page: + * 2) suspend the page and wait for inflight write IO to be done; + * 3) change dirty page to clean; + * 4) resume the page; */ #define BITMAP_MAX_SECTOR (128 * 2) #define BITMAP_MAX_PAGES 32 #define BITMAP_SB_SIZE 1024 +#define BARRIER_IDLE 5 + enum llbitmap_state { /* No valid data, init state after assemble the array */ BitUnwritten = 0, @@ -115,6 +132,16 @@ enum llbitmap_action { BitmapActionInit, }; +/* + * page level barrier to synchronize between dirty bit by write IO and clean bit + * by daemon. + */ +struct llbitmap_barrier { + struct percpu_ref active; + unsigned long expire; + wait_queue_head_t wait; +} ____cacheline_aligned_in_smp; + struct llbitmap { struct mddev *mddev; /* hidden disk to manage bitmap IO */ @@ -123,6 +150,7 @@ struct llbitmap { struct file *bitmap_file; int nr_pages; struct page *pages[BITMAP_MAX_PAGES]; + struct llbitmap_barrier barrier[BITMAP_MAX_PAGES]; struct bio_set bio_set; struct bio_list retry_list; @@ -492,3 +520,94 @@ static void llbitmap_close_disk(struct llbitmap *llbitmap) fput(bitmap_file); } +static void llbitmap_free_pages(struct llbitmap *llbitmap) +{ + int i; + + for (i = 0; i < BITMAP_MAX_PAGES; i++) { + struct page *page = llbitmap->pages[i]; + + if (!page) + return; + + llbitmap->pages[i] = NULL; + put_page(page); + percpu_ref_exit(&llbitmap->barrier[i].active); + } +} + +static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx) +{ + struct llbitmap_barrier *barrier = &llbitmap->barrier[page_idx]; + +retry: + if (likely(percpu_ref_tryget_live(&barrier->active))) { + WRITE_ONCE(barrier->expire, jiffies + BARRIER_IDLE * HZ); + return; + } + + wait_event(barrier->wait, !percpu_ref_is_dying(&barrier->active)); + goto retry; +} + +static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx) +{ + struct llbitmap_barrier *barrier = &llbitmap->barrier[page_idx]; + + percpu_ref_put(&barrier->active); +} + +static void llbitmap_suspend(struct llbitmap *llbitmap, int page_idx) +{ + struct llbitmap_barrier *barrier = &llbitmap->barrier[page_idx]; + + percpu_ref_kill(&barrier->active); + wait_event(barrier->wait, percpu_ref_is_zero(&barrier->active)); +} + +static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx) +{ + struct llbitmap_barrier *barrier = &llbitmap->barrier[page_idx]; + + barrier->expire = LONG_MAX; + percpu_ref_resurrect(&barrier->active); + wake_up(&barrier->wait); +} + +static void active_release(struct percpu_ref *ref) +{ + struct llbitmap_barrier *barrier = + container_of(ref, struct llbitmap_barrier, active); + + wake_up(&barrier->wait); +} + +static int llbitmap_cache_pages(struct llbitmap *llbitmap) +{ + int nr_pages = (llbitmap->chunks + BITMAP_SB_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; + struct page *page; + int i = 0; + + llbitmap->nr_pages = nr_pages; + while (i < nr_pages) { + page = read_mapping_page(llbitmap->bitmap_file->f_mapping, i, NULL); + if (IS_ERR(page)) { + int ret = PTR_ERR(page); + + llbitmap_free_pages(llbitmap); + return ret; + } + + if (percpu_ref_init(&llbitmap->barrier[i].active, active_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { + put_page(page); + return -ENOMEM; + } + + init_waitqueue_head(&llbitmap->barrier[i].wait); + llbitmap->pages[i++] = page; + } + + return 0; +} + -- 2.39.2