This interface allows to suspend IO for a while. It acts the same as dm's suspend: we delay each new pio and wait till already suspended pio is completed. This approach allows to avoid corner cases, which are possible in current code: after device is suspended, there is no a pio in intermediate state.
Also, this will allow to implement ENOSPC delay in further. Signed-off-by: Kirill Tkhai <[email protected]> --- drivers/md/dm-ploop-cmd.c | 219 +++++++++++++++++------------------------- drivers/md/dm-ploop-map.c | 34 ++++--- drivers/md/dm-ploop-target.c | 1 drivers/md/dm-ploop.h | 21 +--- 4 files changed, 116 insertions(+), 159 deletions(-) diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c index 4837c875ceaa..392290bb42ea 100644 --- a/drivers/md/dm-ploop-cmd.c +++ b/drivers/md/dm-ploop-cmd.c @@ -93,7 +93,7 @@ static int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable) unsigned int index = ploop->inflight_bios_ref_index; int ret; - WARN_ON_ONCE(!(current->flags & PF_WQ_WORKER)); + WARN_ON_ONCE(current->flags & PF_WQ_WORKER); if (ploop->inflight_ref_comp_pending) { /* Previous completion was interrupted */ @@ -106,9 +106,9 @@ static int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable) init_completion(comp); - write_lock_irq(&ploop->bat_rwlock); + spin_lock_irq(&ploop->deferred_lock); ploop->inflight_bios_ref_index = !index; - write_unlock_irq(&ploop->bat_rwlock); + spin_unlock_irq(&ploop->deferred_lock); percpu_ref_kill(&ploop->inflight_bios_ref[index]); @@ -122,6 +122,29 @@ static int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable) return 0; } +static int ploop_suspend_submitting_pios(struct ploop *ploop) +{ + spin_lock_irq(&ploop->deferred_lock); + WARN_ON_ONCE(ploop->stop_submitting_pios); + ploop->stop_submitting_pios = true; + spin_unlock_irq(&ploop->deferred_lock); + + return ploop_inflight_bios_ref_switch(ploop, true); +} + +static void ploop_resume_submitting_pios(struct ploop *ploop) +{ + LIST_HEAD(list); + + spin_lock_irq(&ploop->deferred_lock); + WARN_ON_ONCE(!ploop->stop_submitting_pios); + ploop->stop_submitting_pios = false; + list_splice_tail_init(&ploop->delayed_pios, &list); + spin_unlock_irq(&ploop->deferred_lock); + + submit_pios(ploop, &list); +} + /* Find existing BAT cluster pointing to dst_cluster */ static unsigned int ploop_find_bat_entry(struct ploop *ploop, unsigned int dst_cluster, @@ -262,12 +285,8 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, if (cluster == UINT_MAX || is_locked) { /* dst_cluster in top delta is not occupied? */ if (!test_bit(dst_cluster, ploop->holes_bitmap) || is_locked) { - /* - * No. Maybe, it's under COW. Try again later. - * FIXME: implement a wait list-like thing for - * clusters under COW and queue commands there. - */ - schedule_timeout(HZ/10); + WARN_ON_ONCE(1); + ret = -EIO; goto out; } /* Cluster is free, occupy it. Skip relocaton */ @@ -275,11 +294,6 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop, goto not_occupied; } - /* Wait inflights, which may use @cluster */ - ret = ploop_inflight_bios_ref_switch(ploop, true); - if (ret < 0) - goto out; - /* Read full cluster sync */ ret = ploop_read_cluster_sync(ploop, pio, dst_cluster); if (ret < 0) @@ -556,10 +570,14 @@ static int ploop_resize(struct ploop *ploop, sector_t new_sectors) cmd.type = PLOOP_CMD_RESIZE; cmd.ploop = ploop; + ploop_suspend_submitting_pios(ploop); + /* FIXME: Avoid using work */ init_completion(&cmd.comp); ploop_queue_deferred_cmd(ploop, &cmd); wait_for_completion(&cmd.comp); + ploop_resume_submitting_pios(ploop); + ret = cmd.retval; err: if (cmd.resize.pio) @@ -626,8 +644,6 @@ static void process_merge_latest_snapshot_cmd(struct ploop *ploop, { unsigned int dst_cluster, *cluster = &cmd->merge.cluster; u8 level; - struct file *file; - int ret; if (cmd->retval) goto out; @@ -664,28 +680,14 @@ static void process_merge_latest_snapshot_cmd(struct ploop *ploop, return; } - if (cmd->retval == 0 && !cmd->merge.do_repeat) { - /* Delta merged. Release delta's file */ - ret = ploop_inflight_bios_ref_switch(ploop, true); - if (ret) { - cmd->retval = ret; - goto complete; - } - write_lock_irq(&ploop->bat_rwlock); - level = ploop->nr_deltas - 2; - file = ploop->deltas[level].file; - ploop->deltas[level] = ploop->deltas[level + 1]; - ploop->nr_deltas--; - write_unlock_irq(&ploop->bat_rwlock); - fput(file); - } -complete: complete(&cmd->comp); /* Last touch of cmd memory */ } static int ploop_merge_latest_snapshot(struct ploop *ploop) { struct ploop_cmd cmd; + struct file *file; + u8 level; int ret; if (ploop->maintaince) @@ -712,28 +714,37 @@ static int ploop_merge_latest_snapshot(struct ploop *ploop) wait_for_completion(&cmd.comp); } - if (cmd.retval == 0 && cmd.merge.do_repeat) + if (cmd.retval) + goto out; + + if (cmd.merge.do_repeat) goto again; + /* Delta merged. Release delta's file */ + cmd.retval = ploop_suspend_submitting_pios(ploop); + if (cmd.retval) + goto out; + + write_lock_irq(&ploop->bat_rwlock); + level = ploop->nr_deltas - 2; + file = ploop->deltas[level].file; + ploop->deltas[level] = ploop->deltas[level + 1]; + ploop->nr_deltas--; + write_unlock_irq(&ploop->bat_rwlock); + fput(file); + + ploop_resume_submitting_pios(ploop); +out: return cmd.retval; } -static void process_notify_delta_merged(struct ploop *ploop, - struct ploop_cmd *cmd) +static void notify_delta_merged(struct ploop *ploop, u8 level, + void *hdr, bool forward) { unsigned int i, end, *bat_entries, *delta_bat_entries; - void *hdr = cmd->notify_delta_merged.hdr; - u8 level = cmd->notify_delta_merged.level; struct rb_node *node; struct md_page *md; struct file *file; - int ret; - - ret = ploop_inflight_bios_ref_switch(ploop, true); - if (ret) { - cmd->retval = ret; - goto out; - } /* Points to hdr since md_page[0] also contains hdr. */ delta_bat_entries = (map_index_t *)hdr; @@ -761,7 +772,7 @@ static void process_notify_delta_merged(struct ploop *ploop, */ bat_entries[i] = delta_bat_entries[i]; WARN_ON(bat_entries[i] == BAT_ENTRY_NONE); - if (!cmd->notify_delta_merged.forward) + if (!forward) md->bat_levels[i]--; } kunmap_atomic(bat_entries); @@ -775,23 +786,14 @@ static void process_notify_delta_merged(struct ploop *ploop, ploop->deltas[--ploop->nr_deltas].file = NULL; write_unlock_irq(&ploop->bat_rwlock); fput(file); - cmd->retval = 0; -out: - complete(&cmd->comp); /* Last touch of cmd memory */ } -static void process_update_delta_index(struct ploop *ploop, - struct ploop_cmd *cmd) +static int process_update_delta_index(struct ploop *ploop, u8 level, + const char *map) { - const char *map = cmd->update_delta_index.map; - u8 level = cmd->update_delta_index.level; unsigned int cluster, dst_cluster, n; int ret; - ret = ploop_inflight_bios_ref_switch(ploop, true); - if (ret) - goto out; - write_lock_irq(&ploop->bat_rwlock); /* Check all */ while (sscanf(map, "%u:%u;%n", &cluster, &dst_cluster, &n) == 2) { @@ -806,7 +808,6 @@ static void process_update_delta_index(struct ploop *ploop, goto unlock; } /* Commit all */ - map = cmd->update_delta_index.map; while (sscanf(map, "%u:%u;%n", &cluster, &dst_cluster, &n) == 2) { try_update_bat_entry(ploop, cluster, level, dst_cluster); map += n; @@ -814,15 +815,12 @@ static void process_update_delta_index(struct ploop *ploop, ret = 0; unlock: write_unlock_irq(&ploop->bat_rwlock); -out: - cmd->retval = ret; - complete(&cmd->comp); /* Last touch of cmd memory */ + return ret; } static int ploop_delta_clusters_merged(struct ploop *ploop, u8 level, bool forward) { - struct ploop_cmd cmd = { {0} }; void *d_hdr = NULL; struct file *file; int ret; @@ -834,16 +832,14 @@ static int ploop_delta_clusters_merged(struct ploop *ploop, u8 level, if (ret) goto out; - cmd.notify_delta_merged.level = level; - cmd.notify_delta_merged.hdr = d_hdr; - cmd.notify_delta_merged.forward = forward; - cmd.type = PLOOP_CMD_NOTIFY_DELTA_MERGED; - cmd.ploop = ploop; + ret = ploop_suspend_submitting_pios(ploop); + if (ret) + goto out; - init_completion(&cmd.comp); - ploop_queue_deferred_cmd(ploop, &cmd); - wait_for_completion(&cmd.comp); - ret = cmd.retval; + notify_delta_merged(ploop, level, d_hdr, forward); + + ploop_resume_submitting_pios(ploop); + ret = 0; out: vfree(d_hdr); return ret; @@ -916,25 +912,25 @@ static int ploop_get_delta_name_cmd(struct ploop *ploop, u8 level, static int ploop_update_delta_index(struct ploop *ploop, unsigned int level, const char *map) { - struct ploop_cmd cmd = { {0} }; + int ret; if (ploop->maintaince) return -EBUSY; if (level >= top_level(ploop)) return -ENOENT; - cmd.update_delta_index.level = level; - cmd.update_delta_index.map = map; - cmd.type = PLOOP_CMD_UPDATE_DELTA_INDEX; - cmd.ploop = ploop; + ret = ploop_suspend_submitting_pios(ploop); + if (ret) + goto out; - init_completion(&cmd.comp); - ploop_queue_deferred_cmd(ploop, &cmd); - wait_for_completion(&cmd.comp); - return cmd.retval; + ret = process_update_delta_index(ploop, level, map); + + ploop_resume_submitting_pios(ploop); +out: + return ret; } -static void process_flip_upper_deltas(struct ploop *ploop, struct ploop_cmd *cmd) +static int process_flip_upper_deltas(struct ploop *ploop) { unsigned int i, size, end, bat_clusters, hb_nr, *bat_entries; void *holes_bitmap = ploop->holes_bitmap; @@ -974,37 +970,21 @@ static void process_flip_upper_deltas(struct ploop *ploop, struct ploop_cmd *cmd /* FIXME */ swap(ploop->deltas[level], ploop->deltas[level+1]); write_unlock_irq(&ploop->bat_rwlock); - /* Device is suspended, but anyway... */ - ploop_inflight_bios_ref_switch(ploop, false); - - cmd->retval = 0; - complete(&cmd->comp); /* Last touch of cmd memory */ + return 0; } -static void process_tracking_start(struct ploop *ploop, struct ploop_cmd *cmd) +static int process_tracking_start(struct ploop *ploop, void *tracking_bitmap, + u32 tb_nr) { - unsigned int i, nr_pages, end, *bat_entries, dst_cluster, tb_nr, nr; - void *tracking_bitmap = cmd->tracking_start.tracking_bitmap; + unsigned int i, nr_pages, end, *bat_entries, dst_cluster, nr; struct rb_node *node; struct md_page *md; int ret = 0; - tb_nr = cmd->tracking_start.tb_nr; - write_lock_irq(&ploop->bat_rwlock); ploop->tracking_bitmap = tracking_bitmap; ploop->tb_nr = tb_nr; - write_unlock_irq(&ploop->bat_rwlock); - /* - * Here we care about ploop_map() sees ploop->tracking_bitmap, - * since the rest of submitting are made from *this* kwork. - */ - ret = ploop_inflight_bios_ref_switch(ploop, true); - if (ret) - goto out; - - write_lock_irq(&ploop->bat_rwlock); for_each_clear_bit(i, ploop->holes_bitmap, ploop->hb_nr) set_bit(i, tracking_bitmap); nr_pages = bat_clu_to_page_nr(ploop->nr_bat_entries - 1) + 1; @@ -1030,10 +1010,9 @@ static void process_tracking_start(struct ploop *ploop, struct ploop_cmd *cmd) nr++; } write_unlock_irq(&ploop->bat_rwlock); + BUG_ON(ret == 0 && nr != nr_pages); -out: - cmd->retval = ret; - complete(&cmd->comp); /* Last touch of cmd memory */ + return ret; } static int tracking_get_next(struct ploop *ploop, char *result, @@ -1093,7 +1072,6 @@ static unsigned int max_dst_cluster_in_top_delta(struct ploop *ploop) static int ploop_tracking_cmd(struct ploop *ploop, const char *suffix, char *result, unsigned int maxlen) { - struct ploop_cmd cmd = { {0} }; void *tracking_bitmap = NULL; unsigned int tb_nr, size; int ret = 0; @@ -1128,16 +1106,15 @@ static int ploop_tracking_cmd(struct ploop *ploop, const char *suffix, return -ENOMEM; ploop->tb_cursor = tb_nr - 1; - cmd.type = PLOOP_CMD_TRACKING_START; - cmd.ploop = ploop; - cmd.tracking_start.tracking_bitmap = tracking_bitmap; - cmd.tracking_start.tb_nr = tb_nr; + ret = ploop_suspend_submitting_pios(ploop); + if (ret) + return ret; - init_completion(&cmd.comp); - ploop_queue_deferred_cmd(ploop, &cmd); - wait_for_completion(&cmd.comp); ploop->maintaince = true; - ret = cmd.retval; + ret = process_tracking_start(ploop, tracking_bitmap, tb_nr); + + ploop_resume_submitting_pios(ploop); + if (ret) goto stop; } else if (!strcmp(suffix, "stop")) { @@ -1177,12 +1154,8 @@ static int ploop_set_noresume(struct ploop *ploop, char *mode) static int ploop_flip_upper_deltas(struct ploop *ploop) { struct dm_target *ti = ploop->ti; - struct ploop_cmd cmd = { {0} }; struct file *file; - cmd.type = PLOOP_CMD_FLIP_UPPER_DELTAS; - cmd.ploop = ploop; - if (!dm_suspended(ti) || !ploop->noresume || ploop->maintaince) return -EBUSY; if (ploop_is_ro(ploop)) @@ -1195,11 +1168,7 @@ static int ploop_flip_upper_deltas(struct ploop *ploop) if (!(file->f_mode & FMODE_WRITE)) return -EACCES; - init_completion(&cmd.comp); - ploop_queue_deferred_cmd(ploop, &cmd); - wait_for_completion(&cmd.comp); - - return cmd.retval; + return process_flip_upper_deltas(ploop); } /* Handle user commands requested via "message" interface */ @@ -1222,14 +1191,6 @@ void process_deferred_cmd(struct ploop *ploop, struct ploop_index_wb *piwb) process_resize_cmd(ploop, piwb, cmd); } else if (cmd->type == PLOOP_CMD_MERGE_SNAPSHOT) { process_merge_latest_snapshot_cmd(ploop, cmd); - } else if (cmd->type == PLOOP_CMD_NOTIFY_DELTA_MERGED) { - process_notify_delta_merged(ploop, cmd); - } else if (cmd->type == PLOOP_CMD_UPDATE_DELTA_INDEX) { - process_update_delta_index(ploop, cmd); - } else if (cmd->type == PLOOP_CMD_TRACKING_START) { - process_tracking_start(ploop, cmd); - } else if (cmd->type == PLOOP_CMD_FLIP_UPPER_DELTAS) { - process_flip_upper_deltas(ploop, cmd); } else { cmd->retval = -EINVAL; complete(&cmd->comp); diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index 73d44122361b..c1c1c2a4538d 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -974,8 +974,6 @@ static bool ploop_data_pio_end(struct pio *pio) unsigned long flags; bool completed; - dec_nr_inflight(piwb->ploop, pio); - spin_lock_irqsave(&piwb->lock, flags); completed = piwb->completed; if (!completed) @@ -1134,7 +1132,6 @@ static void ploop_cow_endio(struct pio *cluster_pio, void *data, blk_status_t bi list_add_tail(&cluster_pio->list, &ploop->delta_cow_action_list); spin_unlock_irqrestore(&ploop->deferred_lock, flags); - dec_nr_inflight(ploop, &cow->aux_pio); queue_work(ploop->wq, &ploop->worker); } @@ -1226,9 +1223,6 @@ static void submit_cluster_write(struct ploop_cow *cow) pio_prepare_offsets(ploop, pio, dst_cluster); BUG_ON(irqs_disabled()); - read_lock_irq(&ploop->bat_rwlock); - inc_nr_inflight(ploop, &cow->aux_pio); - read_unlock_irq(&ploop->bat_rwlock); pio->endio_cb = ploop_cow_endio; pio->endio_cb_data = cow; @@ -1439,11 +1433,6 @@ static int process_one_deferred_bio(struct ploop *ploop, struct pio *pio, if (!ret) goto out; queue: - /* To improve: read lock may be avoided */ - read_lock_irq(&ploop->bat_rwlock); - inc_nr_inflight(ploop, pio); - read_unlock_irq(&ploop->bat_rwlock); - link_submitting_pio(ploop, pio, cluster); submit_rw_mapped(ploop, dst_cluster, pio); @@ -1630,11 +1619,12 @@ static noinline struct bio_vec *create_bvec_from_rq(struct request *rq) return bvec; } -static noinline void submit_pio(struct ploop *ploop, struct pio *pio) +static void submit_pio(struct ploop *ploop, struct pio *pio) { struct list_head *queue_list; struct work_struct *worker; unsigned long flags; + bool queue = true; LIST_HEAD(list); int ret; @@ -1661,9 +1651,19 @@ static noinline void submit_pio(struct ploop *ploop, struct pio *pio) list_add(&pio->list, &list); spin_lock_irqsave(&ploop->deferred_lock, flags); + if (unlikely(ploop->stop_submitting_pios)) { + list_splice_tail(&list, &ploop->delayed_pios); + queue = false; + goto unlock; + } + + inc_nr_inflight(ploop, pio); list_splice_tail(&list, queue_list); +unlock: spin_unlock_irqrestore(&ploop->deferred_lock, flags); - queue_work(ploop->wq, worker); + + if (queue) + queue_work(ploop->wq, worker); return; kill: pio->bi_status = BLK_STS_IOERR; @@ -1671,6 +1671,14 @@ static noinline void submit_pio(struct ploop *ploop, struct pio *pio) pio_endio(pio); } +void submit_pios(struct ploop *ploop, struct list_head *list) +{ + struct pio *pio; + + while ((pio = pio_list_pop(list)) != NULL) + submit_pio(ploop, pio); +} + int ploop_clone_and_map(struct dm_target *ti, struct request *rq, union map_info *info, struct request **clone) { diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c index 692d7c6a52d4..5af682dc2ea4 100644 --- a/drivers/md/dm-ploop-target.c +++ b/drivers/md/dm-ploop-target.c @@ -307,6 +307,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&ploop->inflight_lock); spin_lock_init(&ploop->deferred_lock); + INIT_LIST_HEAD(&ploop->delayed_pios); INIT_LIST_HEAD(&ploop->deferred_pios); INIT_LIST_HEAD(&ploop->flush_pios); INIT_LIST_HEAD(&ploop->discard_pios); diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h index bc2090274705..ce9c568779f9 100644 --- a/drivers/md/dm-ploop.h +++ b/drivers/md/dm-ploop.h @@ -43,10 +43,6 @@ struct ploop_delta { struct ploop_cmd { #define PLOOP_CMD_RESIZE 1 #define PLOOP_CMD_MERGE_SNAPSHOT 3 -#define PLOOP_CMD_NOTIFY_DELTA_MERGED 4 -#define PLOOP_CMD_UPDATE_DELTA_INDEX 6 -#define PLOOP_CMD_TRACKING_START 7 -#define PLOOP_CMD_FLIP_UPPER_DELTAS 8 struct completion comp; struct ploop *ploop; unsigned int type; @@ -73,19 +69,6 @@ struct ploop_cmd { unsigned int cluster; /* Currently iterated cluster */ bool do_repeat; } merge; - struct { - void *hdr; /* hdr and bat_entries consequentially */ - u8 level; - bool forward; - } notify_delta_merged; - struct { - u8 level; - const char *map; - } update_delta_index; - struct { - void *tracking_bitmap; - unsigned int tb_nr; - } tracking_start; }; }; @@ -179,6 +162,9 @@ struct ploop { bool inflight_ref_comp_pending; unsigned int inflight_bios_ref_index:1; + struct list_head delayed_pios; + bool stop_submitting_pios; + spinlock_t inflight_lock; spinlock_t deferred_lock; struct list_head deferred_pios; @@ -504,6 +490,7 @@ extern bool try_update_bat_entry(struct ploop *ploop, unsigned int cluster, extern int convert_bat_entries(u32 *bat_entries, u32 count); extern int ploop_add_delta(struct ploop *ploop, u32 level, struct file *file, bool is_raw); +extern void submit_pios(struct ploop *ploop, struct list_head *list); extern void defer_pios(struct ploop *ploop, struct pio *pio, struct list_head *pio_list); extern void do_ploop_work(struct work_struct *ws); extern void do_ploop_fsync_work(struct work_struct *ws); _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
