The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh8-4.18.0-240.1.1.vz8.5.30 ------> commit 56173658cb8730f5cf0dc37d691ece8d4e8f8ab3 Author: Kirill Tkhai <ktk...@virtuozzo.com> Date: Mon May 17 19:04:55 2021 +0300
ploop: Convert COW Make COW proceed via pio Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> ===================== Patchset description: dm-ploop: Kill loop Intermediate patches can't be base for bisect. In scope of https://jira.sw.ru/browse/PSBM-123654 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> --- drivers/md/dm-ploop-cmd.c | 54 ++++++------- drivers/md/dm-ploop-map.c | 180 +++++++++++++++++++++---------------------- drivers/md/dm-ploop-target.c | 2 +- drivers/md/dm-ploop.h | 22 ++---- 4 files changed, 124 insertions(+), 134 deletions(-) diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c index fc106204e654..ecea3040b1cb 100644 --- a/drivers/md/dm-ploop-cmd.c +++ b/drivers/md/dm-ploop-cmd.c @@ -197,20 +197,20 @@ static unsigned int ploop_find_bat_entry(struct ploop *ploop, return cluster; } -void bio_prepare_offsets(struct ploop *ploop, struct bio *bio, +void pio_prepare_offsets(struct ploop *ploop, struct pio *pio, unsigned int cluster) { unsigned int cluster_log = ploop->cluster_log; int i, nr_pages = nr_pages_in_cluster(ploop); - bio->bi_vcnt = nr_pages; + pio->bi_vcnt = nr_pages; for (i = 0; i < nr_pages; i++) { - bio->bi_io_vec[i].bv_offset = 0; - bio->bi_io_vec[i].bv_len = PAGE_SIZE; + pio->bi_io_vec[i].bv_offset = 0; + pio->bi_io_vec[i].bv_len = PAGE_SIZE; } - bio->bi_iter.bi_sector = cluster << cluster_log; - bio->bi_iter.bi_size = 1 << (cluster_log + 9); + pio->bi_iter.bi_sector = cluster << cluster_log; + pio->bi_iter.bi_size = 1 << (cluster_log + 9); } static int rw_pages_sync(int rw, struct file *file, u64 page_id, void *data) @@ -445,41 +445,41 @@ static void process_resize_cmd(struct ploop *ploop, struct ploop_index_wb *piwb, complete(&cmd->comp); /* Last touch of cmd memory */ } -struct bio *alloc_bio_with_pages(struct ploop *ploop) +struct pio *alloc_pio_with_pages(struct ploop *ploop) { unsigned int cluster_log = ploop->cluster_log; int i, nr_pages = nr_pages_in_cluster(ploop); - struct bio *bio; + struct pio *pio; + u32 size; - if (nr_pages <= BIO_MAX_PAGES) - bio = bio_alloc(GFP_NOIO, nr_pages); - else - bio = bio_kmalloc(GFP_NOIO, nr_pages); - if (!bio) + size = sizeof(*pio) + sizeof(*pio->bi_io_vec) * nr_pages; + pio = kmalloc(size, GFP_NOIO); + if (!pio) return NULL; + pio->bi_io_vec = (void *)(pio + 1); for (i = 0; i < nr_pages; i++) { - bio->bi_io_vec[i].bv_page = alloc_page(GFP_NOIO); - if (!bio->bi_io_vec[i].bv_page) + pio->bi_io_vec[i].bv_page = alloc_page(GFP_NOIO); + if (!pio->bi_io_vec[i].bv_page) goto err; - bio->bi_io_vec[i].bv_offset = 0; - bio->bi_io_vec[i].bv_len = PAGE_SIZE; + pio->bi_io_vec[i].bv_offset = 0; + pio->bi_io_vec[i].bv_len = PAGE_SIZE; } - bio->bi_vcnt = nr_pages; - bio->bi_iter.bi_size = 1 << (cluster_log + 9); + pio->bi_vcnt = nr_pages; + pio->bi_iter.bi_size = 1 << (cluster_log + 9); - return bio; + return pio; err: while (i-- > 0) - put_page(bio->bi_io_vec[i].bv_page); - bio_put(bio); + put_page(pio->bi_io_vec[i].bv_page); + kfree(pio); return NULL; } -void free_bio_with_pages(struct ploop *ploop, struct bio *bio) +void free_pio_with_pages(struct ploop *ploop, struct pio *pio) { - int i, nr_pages = bio->bi_vcnt; + int i, nr_pages = pio->bi_vcnt; struct page *page; /* @@ -489,11 +489,11 @@ void free_bio_with_pages(struct ploop *ploop, struct bio *bio) WARN_ON_ONCE(nr_pages != nr_pages_in_cluster(ploop)); for (i = 0; i < nr_pages; i++) { - page = bio->bi_io_vec[i].bv_page; + page = pio->bi_io_vec[i].bv_page; put_page(page); } - bio_put(bio); + kfree(pio); } /* @new_size is in sectors */ @@ -1516,7 +1516,7 @@ static bool ploop_has_pending_activity(struct ploop *ploop) has = ploop->deferred_cmd; has |= !list_empty(&ploop->deferred_pios); has |= !list_empty(&ploop->discard_pios); - has |= !bio_list_empty(&ploop->delta_cow_action_list); + has |= !list_empty(&ploop->delta_cow_action_list); spin_unlock_irq(&ploop->deferred_lock); return has; diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index 46cc98c945e8..5c2b4e32e525 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -74,7 +74,7 @@ static void ploop_index_wb_init(struct ploop_index_wb *piwb, struct ploop *ploop piwb->bat_page = NULL; piwb->bi_status = 0; INIT_LIST_HEAD(&piwb->ready_data_pios); - bio_list_init(&piwb->cow_list); + INIT_LIST_HEAD(&piwb->cow_list); /* For ploop_bat_write_complete() */ atomic_set(&piwb->count, 1); piwb->completed = false; @@ -522,12 +522,12 @@ static int ploop_discard_index_pio_end(struct ploop *ploop, struct pio *pio) static void complete_cow(struct ploop_cow *cow, blk_status_t bi_status) { unsigned int dst_cluster = cow->dst_cluster; - struct bio *cluster_bio = cow->cluster_bio; + struct pio *cluster_pio = cow->cluster_pio; struct ploop *ploop = cow->ploop; unsigned long flags; struct pio *h; - WARN_ON_ONCE(cluster_bio->bi_next); + WARN_ON_ONCE(!list_empty(&cluster_pio->list)); h = &cow->hook; del_cluster_lk(ploop, h); @@ -542,7 +542,7 @@ static void complete_cow(struct ploop_cow *cow, blk_status_t bi_status) cow->end_fn(ploop, blk_status_to_errno(bi_status), cow->data); queue_work(ploop->wq, &ploop->worker); - free_bio_with_pages(ploop, cow->cluster_bio); + free_pio_with_pages(ploop, cow->cluster_pio); kmem_cache_free(cow_cache, cow); } @@ -668,7 +668,7 @@ static void ploop_bat_write_complete(struct ploop_index_wb *piwb, blk_status_t bi_status) { struct ploop *ploop = piwb->ploop; - struct bio *cluster_bio; + struct pio *cluster_pio; struct ploop_cow *cow; struct pio *data_pio; unsigned long flags; @@ -700,8 +700,8 @@ static void ploop_bat_write_complete(struct ploop_index_wb *piwb, pio_endio(data_pio); } - while ((cluster_bio = bio_list_pop(&piwb->cow_list))) { - cow = cluster_bio->bi_private; + while ((cluster_pio = pio_list_pop(&piwb->cow_list))) { + cow = cluster_pio->endio_cb_data; complete_cow(cow, bi_status); } @@ -883,33 +883,62 @@ static bool ploop_attach_end_action(struct pio *h, struct ploop_index_wb *piwb) static void ploop_read_aio_do_completion(struct ploop_iocb *piocb) { - struct bio *bio = piocb->bio; + struct pio *pio = piocb->pio; if (!atomic_dec_and_test(&piocb->count)) return; - bio_endio(bio); + pio_endio(pio); kmem_cache_free(piocb_cache, piocb); } static void ploop_read_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct ploop_iocb *piocb = container_of(iocb, struct ploop_iocb, iocb); - struct bio *bio = piocb->bio; + struct pio *pio = piocb->pio; - if (ret != bio->bi_iter.bi_size) - bio->bi_status = BLK_STS_IOERR; - else - bio->bi_status = BLK_STS_OK; + if (ret != pio->bi_iter.bi_size) + pio->bi_status = BLK_STS_IOERR; ploop_read_aio_do_completion(piocb); } + +static void data_rw_complete(struct pio *pio) +{ + if (pio->ret != pio->bi_iter.bi_size) + pio->bi_status = BLK_STS_IOERR; + + pio_endio(pio); +} + +static void submit_rw_mapped(struct ploop *ploop, u32 dst_clu, struct pio *pio) +{ + unsigned int rw, nr_segs; + struct bio_vec *bvec; + struct iov_iter iter; + loff_t pos; + + pio->complete = data_rw_complete; + + rw = (op_is_write(pio->bi_opf) ? WRITE : READ); + nr_segs = pio_nr_segs(pio); + bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter); + + iov_iter_bvec(&iter, rw, bvec, nr_segs, pio->bi_iter.bi_size); + iter.iov_offset = pio->bi_iter.bi_bvec_done; + + remap_to_cluster(ploop, pio, dst_clu); + pos = to_bytes(pio->bi_iter.bi_sector); + + call_rw_iter(top_delta(ploop)->file, pos, rw, &iter, pio); +} + /* * Read cluster or its part from secondary delta. - * @bio is dm's or plain (w/o pio container and ploop_endio()). + * @pio is dm's or plain (w/o bio container and ploop_endio()). * Note, that nr inflight is not incremented here, so delegate this to caller * (if you need). */ static void submit_delta_read(struct ploop *ploop, unsigned int level, - unsigned int dst_cluster, struct bio *bio) + unsigned int dst_cluster, struct pio *pio) { unsigned int flags, offset; struct ploop_iocb *piocb; @@ -921,22 +950,22 @@ static void submit_delta_read(struct ploop *ploop, unsigned int level, piocb = kmem_cache_zalloc(piocb_cache, GFP_NOIO); if (!piocb) { - bio->bi_status = BLK_STS_RESOURCE; - bio_endio(bio); + pio->bi_status = BLK_STS_RESOURCE; + pio_endio(pio); return; } atomic_set(&piocb->count, 2); - piocb->bio = bio; + piocb->pio = pio; - remap_to_cluster_bio(ploop, bio, dst_cluster); + remap_to_cluster(ploop, pio, dst_cluster); - bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - offset = bio->bi_iter.bi_bvec_done; + bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter); + offset = pio->bi_iter.bi_bvec_done; - iov_iter_bvec(&iter, READ, bvec, 1, bio->bi_iter.bi_size); + iov_iter_bvec(&iter, READ, bvec, 1, pio->bi_iter.bi_size); iter.iov_offset = offset; - pos = (bio->bi_iter.bi_sector << SECTOR_SHIFT); + pos = (pio->bi_iter.bi_sector << SECTOR_SHIFT); file = ploop->deltas[level].file; piocb->iocb.ki_pos = pos; @@ -958,29 +987,27 @@ static void submit_delta_read(struct ploop *ploop, unsigned int level, static void initiate_delta_read(struct ploop *ploop, unsigned int level, unsigned int dst_cluster, struct pio *pio) { - struct bio *bio = dm_bio_from_per_bio_data(pio, sizeof(*pio)); - if (dst_cluster == BAT_ENTRY_NONE) { + struct bio *bio = dm_bio_from_per_bio_data(pio, sizeof(*pio)); /* No one delta contains dst_cluster. */ zero_fill_bio(bio); pio_endio(pio); return; } - submit_delta_read(ploop, level, dst_cluster, bio); + submit_delta_read(ploop, level, dst_cluster, pio); } -static void ploop_cow_endio(struct bio *cluster_bio) +static void ploop_cow_endio(struct pio *cluster_pio, void *data, blk_status_t bi_status) { - struct ploop_cow *cow = cluster_bio->bi_private; + struct ploop_cow *cow = data; struct ploop *ploop = cow->ploop; - unsigned int dst_cluster = cluster_bio->bi_iter.bi_sector >> ploop->cluster_log; unsigned long flags; - track_dst_cluster(ploop, dst_cluster); + track_pio(ploop, cluster_pio); spin_lock_irqsave(&ploop->deferred_lock, flags); - bio_list_add(&ploop->delta_cow_action_list, cluster_bio); + list_add_tail(&cluster_pio->list, &ploop->delta_cow_action_list); spin_unlock_irqrestore(&ploop->deferred_lock, flags); dec_nr_inflight_raw(ploop, &cow->hook); @@ -1050,13 +1077,14 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level, unsigned int cluster, unsigned int dst_cluster, void (*end_fn)(struct ploop *, int, void *), void *data) { - struct bio *bio = NULL; + struct pio *pio = NULL; struct ploop_cow *cow; /* Prepare new delta read */ - bio = alloc_bio_with_pages(ploop); - if (!bio) + pio = alloc_pio_with_pages(ploop); + if (!pio) goto err; + ploop_init_end_io(ploop, pio); cow = kmem_cache_alloc(cow_cache, GFP_NOIO); if (!cow) @@ -1064,24 +1092,24 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level, cow->ploop = ploop; cow->dst_cluster = BAT_ENTRY_NONE; - cow->cluster_bio = bio; + cow->cluster_pio = pio; cow->end_fn = end_fn; cow->data = data; - bio_prepare_offsets(ploop, bio, cluster); - bio_set_op_attrs(bio, REQ_OP_READ, 0); - bio->bi_end_io = ploop_cow_endio; - bio->bi_private = cow; + pio_prepare_offsets(ploop, pio, cluster); + pio->bi_opf = REQ_OP_READ; + pio->endio_cb = ploop_cow_endio; + pio->endio_cb_data = cow; ploop_init_end_io(ploop, &cow->hook); add_cluster_lk(ploop, &cow->hook, cluster); /* Stage #0: read secondary delta full cluster */ - submit_delta_read(ploop, level, dst_cluster, bio); + submit_delta_read(ploop, level, dst_cluster, pio); return 0; err: - if (bio) - free_bio_with_pages(ploop, bio); + if (pio) + free_pio_with_pages(ploop, pio); return -ENOMEM; } @@ -1111,7 +1139,7 @@ static void initiate_cluster_cow(struct ploop *ploop, unsigned int level, static void submit_cluster_write(struct ploop_cow *cow) { - struct bio *bio = cow->cluster_bio; + struct pio *pio = cow->cluster_pio; struct ploop *ploop = cow->ploop; unsigned int dst_cluster; @@ -1119,19 +1147,17 @@ static void submit_cluster_write(struct ploop_cow *cow) goto error; cow->dst_cluster = dst_cluster; - bio_reset(bio); - bio_prepare_offsets(ploop, bio, dst_cluster); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - remap_to_origin(ploop, bio); + pio_prepare_offsets(ploop, pio, dst_cluster); + pio->bi_opf = REQ_OP_WRITE; BUG_ON(irqs_disabled()); read_lock_irq(&ploop->bat_rwlock); inc_nr_inflight_raw(ploop, &cow->hook); read_unlock_irq(&ploop->bat_rwlock); - bio->bi_end_io = ploop_cow_endio; - bio->bi_private = cow; + pio->endio_cb = ploop_cow_endio; + pio->endio_cb_data = cow; - submit_bio(bio); + submit_rw_mapped(ploop, dst_cluster, pio); return; error: complete_cow(cow, BLK_STS_IOERR); @@ -1157,7 +1183,8 @@ static void submit_cow_index_wb(struct ploop_cow *cow, if (piwb->page_nr != page_nr || piwb->type != PIWB_TYPE_ALLOC) { /* Another BAT page wb is in process */ spin_lock_irq(&ploop->deferred_lock); - bio_list_add(&ploop->delta_cow_action_list, cow->cluster_bio); + list_add_tail(&cow->cluster_pio->list, + &ploop->delta_cow_action_list); spin_unlock_irq(&ploop->deferred_lock); queue_work(ploop->wq, &ploop->worker); goto out; @@ -1173,7 +1200,7 @@ static void submit_cow_index_wb(struct ploop_cow *cow, /* Prevent double clearing of holes_bitmap bit on complete_cow() */ cow->dst_cluster = BAT_ENTRY_NONE; spin_lock_irq(&ploop->deferred_lock); - bio_list_add(&piwb->cow_list, cow->cluster_bio); + list_add_tail(&cow->cluster_pio->list, &piwb->cow_list); spin_unlock_irq(&ploop->deferred_lock); out: return; @@ -1183,20 +1210,19 @@ static void submit_cow_index_wb(struct ploop_cow *cow, static void process_delta_wb(struct ploop *ploop, struct ploop_index_wb *piwb) { - struct bio_list cow_list = BIO_EMPTY_LIST; - struct bio *cluster_bio; + struct pio *cluster_pio; struct ploop_cow *cow; + LIST_HEAD(cow_list); - if (bio_list_empty(&ploop->delta_cow_action_list)) + if (list_empty(&ploop->delta_cow_action_list)) return; - bio_list_merge(&cow_list, &ploop->delta_cow_action_list); - bio_list_init(&ploop->delta_cow_action_list); + list_splice_tail_init(&ploop->delta_cow_action_list, &cow_list); spin_unlock_irq(&ploop->deferred_lock); - while ((cluster_bio = bio_list_pop(&cow_list)) != NULL) { - cow = cluster_bio->bi_private; - if (unlikely(cluster_bio->bi_status != BLK_STS_OK)) { - complete_cow(cow, cluster_bio->bi_status); + while ((cluster_pio = pio_list_pop(&cow_list)) != NULL) { + cow = cluster_pio->endio_cb_data; + if (unlikely(cluster_pio->bi_status != BLK_STS_OK)) { + complete_cow(cow, cluster_pio->bi_status); continue; } @@ -1283,36 +1309,6 @@ static bool locate_new_cluster_and_attach_pio(struct ploop *ploop, return false; } -static void data_rw_complete(struct pio *pio) -{ - if (pio->ret != pio->bi_iter.bi_size) - pio->bi_status = BLK_STS_IOERR; - - pio_endio(pio); -} - -static void submit_rw_mapped(struct ploop *ploop, loff_t clu_pos, struct pio *pio) -{ - unsigned int rw, nr_segs; - struct bio_vec *bvec; - struct iov_iter iter; - loff_t pos; - - pio->complete = data_rw_complete; - - rw = (op_is_write(pio->bi_opf) ? WRITE : READ); - nr_segs = pio_nr_segs(pio); - bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter); - - iov_iter_bvec(&iter, rw, bvec, nr_segs, pio->bi_iter.bi_size); - iter.iov_offset = pio->bi_iter.bi_bvec_done; - - remap_to_cluster(ploop, pio, clu_pos); - pos = to_bytes(pio->bi_iter.bi_sector); - - call_rw_iter(top_delta(ploop)->file, pos, rw, &iter, pio); -} - static int process_one_deferred_bio(struct ploop *ploop, struct pio *pio, struct ploop_index_wb *piwb) { diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c index 14bd37610b25..bac1d74c4dcf 100644 --- a/drivers/md/dm-ploop-target.c +++ b/drivers/md/dm-ploop-target.c @@ -312,7 +312,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&ploop->flush_pios); INIT_LIST_HEAD(&ploop->discard_pios); INIT_LIST_HEAD(&ploop->cluster_lk_list); - bio_list_init(&ploop->delta_cow_action_list); + INIT_LIST_HEAD(&ploop->delta_cow_action_list); atomic_set(&ploop->nr_discard_bios, 0); ploop->bat_entries = RB_ROOT; ploop->exclusive_bios_rbtree = RB_ROOT; diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h index dad0e5857a50..550550dfb435 100644 --- a/drivers/md/dm-ploop.h +++ b/drivers/md/dm-ploop.h @@ -120,7 +120,7 @@ struct ploop_index_wb { spinlock_t lock; struct page *bat_page; struct list_head ready_data_pios; - struct bio_list cow_list; + struct list_head cow_list; atomic_t count; bool completed; int bi_status; @@ -216,7 +216,7 @@ struct ploop { struct list_head cluster_lk_list; /* List of COW requests requiring action. */ - struct bio_list delta_cow_action_list; + struct list_head delta_cow_action_list; /* Resume is prohibited */ bool noresume; @@ -242,6 +242,7 @@ struct pio { struct bvec_iter bi_iter; struct bio_vec *bi_io_vec; unsigned int bi_opf; + unsigned int bi_vcnt; blk_status_t bi_status; ploop_endio_t endio_cb; @@ -272,14 +273,14 @@ struct pio { struct ploop_iocb { struct kiocb iocb; - struct bio *bio; + struct pio *pio; atomic_t count; }; /* Delta COW private */ struct ploop_cow { struct ploop *ploop; - struct bio *cluster_bio; + struct pio *cluster_pio; unsigned int dst_cluster; struct pio hook; @@ -321,13 +322,6 @@ static inline void remap_to_cluster(struct ploop *ploop, struct pio *pio, pio->bi_iter.bi_sector |= (cluster << ploop->cluster_log); } -static inline void remap_to_cluster_bio(struct ploop *ploop, struct bio *bio, - unsigned int cluster) -{ - bio->bi_iter.bi_sector &= ((1 << ploop->cluster_log) - 1); - bio->bi_iter.bi_sector |= (cluster << ploop->cluster_log); -} - static inline bool whole_cluster(struct ploop *ploop, struct pio *pio) { sector_t end_sector = bvec_iter_end_sector(pio->bi_iter); @@ -552,9 +546,9 @@ extern int submit_cluster_cow(struct ploop *ploop, unsigned int level, unsigned int cluster, unsigned int dst_cluster, void (*end_fn)(struct ploop *, int, void *), void *data); -extern struct bio * alloc_bio_with_pages(struct ploop *ploop); -extern void free_bio_with_pages(struct ploop *ploop, struct bio *bio); -extern void bio_prepare_offsets(struct ploop *, struct bio *, unsigned int); +extern struct pio * alloc_pio_with_pages(struct ploop *ploop); +extern void free_pio_with_pages(struct ploop *ploop, struct pio *pio); +extern void pio_prepare_offsets(struct ploop *, struct pio *, unsigned int); extern void ploop_free_pb(struct push_backup *pb); extern void cleanup_backup(struct ploop *ploop); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel