[Devel] [PATCH 10/10] ploop: Populate and maintain holes bitmap
Holes bitmap is needed for allocation of next free cluster. Otherwise we don't know, where cluster number should be taken. TODO: Flag to handle broken allocs (set bit back) Signed-off-by: Kirill Tkhai --- drivers/block/ploop/fmt_ploop1.c | 199 +- drivers/block/ploop/map.c|6 + include/linux/ploop/ploop.h |3 + 3 files changed, 205 insertions(+), 3 deletions(-) diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c index 9cfc56296e5e..6212634a1c0e 100644 --- a/drivers/block/ploop/fmt_ploop1.c +++ b/drivers/block/ploop/fmt_ploop1.c @@ -23,6 +23,7 @@ struct ploop1_private u64 bd_size; u32 alloc_head; sector_tl1_off; + u32 nr_bat_entries; }; static unsigned int data_off_in_clusters(struct ploop_delta *delta) @@ -78,6 +79,11 @@ static int ploop1_stop(struct ploop_delta * delta) struct ploop_pvd_header *vh; struct ploop1_private * ph = delta->priv; + if (delta->holes_bitmap) { + kvfree(delta->holes_bitmap); + delta->holes_bitmap = NULL; + } + if ((delta->flags & PLOOP_FMT_RDONLY) || test_bit(PLOOP_S_ABORT, >plo->state)) return 0; @@ -115,6 +121,70 @@ ploop1_compose(struct ploop_delta * delta, int nchunks, struct ploop_ctl_chunk * return ploop_io_init(delta, nchunks, pc); } +static int populate_holes_bitmap(struct ploop_delta *delta, +struct ploop1_private *ph) +{ + unsigned int block, nr_blocks, size, off; + struct page *page; + sector_t sec; + u32 *index; + int i, ret; + + if (test_bit(PLOOP_S_NO_FALLOC_DISCARD, >plo->state)) + return 0; + + /* To do: add discard alignment for v1 */ + if (delta->plo->fmt_version != PLOOP_FMT_V2) + return 0; + + ret = -ENOMEM; + page = alloc_page(GFP_KERNEL); + if (!page) + return ret; + + nr_blocks = ph->nr_bat_entries; + /* Bits to bytes */ + size = DIV_ROUND_UP(nr_blocks, 8); + + delta->holes_bitmap = kvmalloc(size, GFP_KERNEL); + if (!delta->holes_bitmap) + goto put_page; + memset(delta->holes_bitmap, 0xff, size); + for (i = nr_blocks; i < size * 8; i++) + clear_bit(i, delta->holes_bitmap); + + block = 0; + while (block < nr_blocks) { + if (!ploop1_map_index(delta, block, )) { + WARN_ONCE(1, "Can't map block\n"); + goto put_page; + } + ret = delta->io.ops->sync_read(>io, page, + 4096, 0, sec); + if (ret) + goto put_page; + + off = block ? 0 : PLOOP_MAP_OFFSET; + + index = page_address(page); + for (i = off; i < INDEX_PER_PAGE && block + i - off < nr_blocks; i++) { + if (index[i] != 0) { + clear_bit((index[i] >> ploop_map_log(delta->plo)) - + data_off_in_clusters(delta), + delta->holes_bitmap); + } + } + + block += (block ? INDEX_PER_PAGE : INDEX_PER_PAGE - PLOOP_MAP_OFFSET); + } + + ret = 0; + +put_page: + put_page(page); + return ret; +} + static int ploop1_open(struct ploop_delta * delta) { @@ -157,6 +227,7 @@ ploop1_open(struct ploop_delta * delta) goto out_err; ph->l1_off = le32_to_cpu(vh->m_FirstBlockOffset); + ph->nr_bat_entries = le32_to_cpu(vh->m_Size); err = -EBUSY; if (pvd_header_is_disk_in_use(vh)) @@ -237,12 +308,37 @@ static void ploop1_allocate(struct ploop_delta * delta, struct ploop_request * preq, struct bio_list * sbl, unsigned int size) { - if (delta->io.alloc_head >= + struct ploop1_private * ph = delta->priv; + cluster_t cluster = 0; + int ret; + + if (delta->holes_bitmap) { + unsigned nr_clusters = ph->nr_bat_entries; + + cluster = find_first_bit(delta->holes_bitmap, nr_clusters); + if (cluster >= nr_clusters) { + PLOOP_FAIL_REQUEST(preq, -ENOSPC); + return; + } + cluster += data_off_in_clusters(delta); + } else if (delta->io.alloc_head >= (delta->max_delta_size >> delta->cluster_log)) { PLOOP_FAIL_REQUEST(preq, -E2BIG); return; } - ploop_submit_alloc(delta, preq, sbl, size, 0); + + ret = ploop_submit_alloc(delta, preq, sbl, size, cluster); + + if (ret == 1 && cluster) { + /* Success. Mark cluster as occupied */ + cluster -=
[Devel] [PATCH 09/10] ploop: Fallocate cluster in cached_submit() during hole reuse
__map_extent_bmap() is for raw format, when we don't have information about presence of a cluster. Ploop1 must allocate all the space in beginning of cached_submit() function. Otherwise, we can't control what is going on. Signed-off-by: Kirill Tkhai --- drivers/block/ploop/io_direct.c | 27 +++ drivers/block/ploop/io_direct_map.c | 11 +++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index ad95cce91f6e..1667989c9c3b 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -389,21 +389,29 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq, loff_t pos, end_pos, start, end; loff_t clu_siz = cluster_size_in_bytes(plo); struct bio_iter biter; - loff_t new_size; + loff_t new_size, prealloc; loff_t used_pos; bool may_fallocate = dio_may_fallocate(io); + bool reusing, once = true; trace_cached_submit(preq); pos = (loff_t)iblk << (plo->cluster_log + 9); end_pos = pos + clu_siz; used_pos = (loff_t)(io->alloc_head - 1) << (io->plo->cluster_log + 9); + reusing = (end_pos <= used_pos); + + if (reusing) { + /* Reusing a hole */ + prealloc = clu_siz; + goto try_again; + } file_start_write(io->files.file); - if (use_prealloc && end_pos > used_pos && may_fallocate) { + if (use_prealloc && (end_pos > used_pos) && may_fallocate) { if (unlikely(io->prealloced_size < used_pos + clu_siz)) { - loff_t prealloc = end_pos; + prealloc = end_pos; if (prealloc > PLOOP_MAX_PREALLOC(plo)) prealloc = PLOOP_MAX_PREALLOC(plo); try_again: @@ -423,7 +431,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq, if (err) goto end_write; - io->prealloced_size = pos + prealloc; + if (io->prealloced_size < pos + prealloc) + io->prealloced_size = pos + prealloc; } } @@ -434,6 +443,16 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq, if (unlikely(IS_ERR(em))) { err = PTR_ERR(em); + if (err == -ENOENT && once) { + /* +* Boundary cluster: temporary crutch +* before io->alloc_head is reworked +* to not be incremented in caller. +*/ + once = false; + prealloc = clu_siz; + goto try_again; + } goto end_write; } diff --git a/drivers/block/ploop/io_direct_map.c b/drivers/block/ploop/io_direct_map.c index 7934bc8ecfb1..84a3986b8769 100644 --- a/drivers/block/ploop/io_direct_map.c +++ b/drivers/block/ploop/io_direct_map.c @@ -703,7 +703,18 @@ static struct extent_map *__map_extent_bmap(struct ploop_io *io, } if (fieinfo.fi_extents_mapped != 1) { + struct ploop_device *plo = io->plo; ploop_extent_put(em); + /* +* In case of io_direct we may support discards +* in multi-delta case, since all allocated blocks +* are added to extent tree. But we follow generic +* way, and encode discarded blocks by zeroing +* their indexes in maps (ploop1). +*/ + if (!test_bit(PLOOP_MAP_IDENTICAL, >map.flags)) + return ERR_PTR(-ENOENT); + ret = fallocate_cluster(io, inode, start_off, len, align_to_clu); if (!ret) goto again; ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 07/10] ploop: Add .complete_merge method
It will be used to reallocated holes bitmap after merge. Signed-off-by: Kirill Tkhai --- drivers/block/ploop/dev.c | 17 - drivers/block/ploop/io_direct_map.c |2 ++ include/linux/ploop/ploop.h |1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 57920a93129a..618d22bf02ec 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -3762,18 +3762,25 @@ static void ploop_update_fmt_version(struct ploop_device * plo) } } -static void ploop_merge_cleanup(struct ploop_device * plo, - struct ploop_map * map, - struct ploop_delta * delta, int err) +static void ploop_merge_complete(struct ploop_device * plo, +struct ploop_map * map, +struct ploop_delta * delta, int err) { + struct ploop_delta *top_delta; + ploop_quiesce(plo); mutex_lock(>sysfs_mutex); list_del(>list); if (err) list_add(>list, >map.delta_list); - else + else { + top_delta = ploop_top_delta(plo); + if (top_delta->ops->complete_merge) + /* FIXME: How should we handle error here? */ + top_delta->ops->complete_merge(top_delta); ploop_update_fmt_version(plo); + } plo->trans_map = NULL; plo->maintenance_type = PLOOP_MNTN_OFF; @@ -3873,7 +3880,7 @@ static int ploop_merge(struct ploop_device * plo) err = -EIO; } - ploop_merge_cleanup(plo, map, delta, err); + ploop_merge_complete(plo, map, delta, err); if (!err) { kobject_del(>kobj); diff --git a/drivers/block/ploop/io_direct_map.c b/drivers/block/ploop/io_direct_map.c index bc65e60e72a3..7934bc8ecfb1 100644 --- a/drivers/block/ploop/io_direct_map.c +++ b/drivers/block/ploop/io_direct_map.c @@ -620,6 +620,8 @@ static int fallocate_cluster(struct ploop_io *io, struct inode *inode, } ret = file->f_op->fallocate(file, FALLOC_FL_KEEP_SIZE, start_off, len); + if (!ret) + ret = io->ops->sync(io); out: return ret; } diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index 73141c536f65..a730c2de7873 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -283,6 +283,7 @@ struct ploop_delta_ops int (*complete_snapshot)(struct ploop_delta *, struct ploop_snapdata *); int (*prepare_merge)(struct ploop_delta *, struct ploop_snapdata *); int (*start_merge)(struct ploop_delta *, struct ploop_snapdata *); + int (*complete_merge)(struct ploop_delta *); int (*truncate)(struct ploop_delta *, struct file *, __u32 alloc_head); int (*prepare_grow)(struct ploop_delta *, u64 *new_size, int *reloc); int (*complete_grow)(struct ploop_delta *, u64 new_size); ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 08/10] ploop: Zero indexes on discard
Shitch preq into PLOOP_E_DATA_WBI state to continue execution after discard's write is finished. Zero index in that stage. Signed-off-by: Kirill Tkhai --- drivers/block/ploop/dev.c|5 - drivers/block/ploop/fmt_ploop1.c |5 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 618d22bf02ec..3ada41f91867 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2461,7 +2461,10 @@ ploop_entry_request(struct ploop_request * preq) spin_unlock_irq(>lock); } preq->iblock = iblk; - preq->eng_state = PLOOP_E_COMPLETE; + if (!(preq->req_rw & REQ_DISCARD)) + preq->eng_state = PLOOP_E_COMPLETE; + else + preq->eng_state = PLOOP_E_DATA_WBI; __TRACE("T %p %u\n", preq, preq->req_cluster); plo->st.bio_out++; delta->io.ops->submit(>io, preq, preq->req_rw, diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c index 370aa6369a8a..9cfc56296e5e 100644 --- a/drivers/block/ploop/fmt_ploop1.c +++ b/drivers/block/ploop/fmt_ploop1.c @@ -250,6 +250,11 @@ ploop1_allocate(struct ploop_delta * delta, struct ploop_request * preq, static void ploop1_allocate_complete(struct ploop_delta * delta, struct ploop_request * preq) { + if ((preq->req_rw & REQ_DISCARD) && + !test_bit(PLOOP_REQ_DISCARD, >state)) { + preq->iblock = 0; + } + ploop_index_update(preq); } ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 02/10] ploop: Make submit_alloc() return int value
-1 is error; 0 means this service preq is skipped; 1 is block was allocated Signed-off-by: Kirill Tkhai --- drivers/block/ploop/io_direct.c |6 -- drivers/block/ploop/io_kaio.c | 11 ++- include/linux/ploop/ploop.h |2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index 21cfdeb7441b..b1955d339de5 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -730,7 +730,7 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * preq, PLOOP_FAIL_REQUEST(preq, err); } -static void +static int dio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, struct bio_list * sbl, unsigned int size) { @@ -741,7 +741,7 @@ dio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, if (!(io->files.file->f_mode & FMODE_WRITE)) { PLOOP_FAIL_REQUEST(preq, -EBADF); - return; + return -1; } err = cached_submit(io, iblk, preq, sbl, size, true); @@ -749,8 +749,10 @@ dio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, if (err == -ENOSPC) io->alloc_head--; PLOOP_FAIL_REQUEST(preq, err); + return -1; } preq->eng_state = PLOOP_E_DATA_WBI; + return 1; } /* When backing fs does not export any method to allocate new blocks diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c index 84da1cec5643..1e4ae6efbe70 100644 --- a/drivers/block/ploop/io_kaio.c +++ b/drivers/block/ploop/io_kaio.c @@ -546,7 +546,7 @@ static int kaio_fsync_thread(void * data) return 0; } -static void +static int kaio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, struct bio_list * sbl, unsigned int size) { @@ -558,7 +558,7 @@ kaio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, if (delta->flags & PLOOP_FMT_RDONLY) { PLOOP_FAIL_REQUEST(preq, -EBADF); - return; + return -1; } iblk = io->alloc_head; @@ -568,7 +568,7 @@ kaio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, kaio_queue_fsync_req(preq); io->plo->st.bio_syncwait++; spin_unlock_irq(>plo->lock); - return; + return 0; } BUG_ON(preq->prealloc_size); @@ -586,11 +586,11 @@ kaio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, kaio_queue_trunc_req(preq); io->plo->st.bio_syncwait++; spin_unlock_irq(>plo->lock); - return; + return 0; } else { /* we're not first */ list_add_tail(>list, >prealloc_preq->delay_list); - return; + return 0; } } @@ -600,6 +600,7 @@ kaio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, preq->eng_state = PLOOP_E_DATA_WBI; kaio_sbl_submit(io->files.file, preq, REQ_WRITE, sbl, iblk, size); + return 1; } static int kaio_release_prealloced(struct ploop_io * io) diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index b209c9f096a8..599f7dbff7f2 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -159,7 +159,7 @@ struct ploop_io_ops void(*submit)(struct ploop_io *, struct ploop_request *, unsigned long rw, struct bio_list *sbl, iblock_t iblk, unsigned int size); - void(*submit_alloc)(struct ploop_io *, struct ploop_request *, + int (*submit_alloc)(struct ploop_io *, struct ploop_request *, struct bio_list *sbl, unsigned int size); void(*post_submit)(struct ploop_io *, struct ploop_request *); ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 01/10] ploop: Export map defines to separate header file
Signed-off-by: Kirill Tkhai --- drivers/block/ploop/fmt_ploop1.c |4 +-- drivers/block/ploop/map.c| 52 +-- drivers/block/ploop/map.h| 56 ++ 3 files changed, 58 insertions(+), 54 deletions(-) create mode 100644 drivers/block/ploop/map.h diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c index 1281a343bff4..382737b4cb6c 100644 --- a/drivers/block/ploop/fmt_ploop1.c +++ b/drivers/block/ploop/fmt_ploop1.c @@ -12,13 +12,11 @@ #include #include "ploop1_image.h" +#include "map.h" /* The implementaion of ploop1 (PVD) delta format, defined in ploop1_fmt.h */ -#define INDEX_PER_PAGE (PAGE_SIZE / 4) -#define INDEX_PER_PAGE_SHIFT (PAGE_SHIFT - 2) - struct ploop1_private { struct page *dyn_page; diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 7b08001aa58b..8a28148c17cb 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -40,12 +40,7 @@ #include #include - -/* This defines slot in mapping page. Right now it is 32 bit - * and therefore it directly matches ploop1 structure. */ -typedef u32 map_index_t; - -#define INDEX_PER_PAGE (PAGE_SIZE / sizeof(map_index_t)) +#include "map.h" static struct kmem_cache * ploop_map_cache; @@ -53,51 +48,6 @@ static LIST_HEAD(map_lru); static DEFINE_SPINLOCK(map_lru_lock); static atomic_t map_pages_nr = ATOMIC_INIT(0); -/* - * Additional information for each page is: - * 1. rb tree link - * 2. Page - * 3. mn_start, mn_end - the first and the last index - * (correspondingly) the page maps to iblocks. - * 4. lru linkage - * 5. delta level of whole page, it is delta, where this page - *is backed. - * 6. Array of delta levels for each map_index in the page. - *If page is backed at level N, those levels cannot be >N. - *If all the levels == N, array of levels is not allocated. - *When at least one level < N, it is stored in the array. - *Note, that in this case exporting page to disk implies - *clearing irrelevant entries. - */ - -struct map_node -{ - struct rb_node rb_link; - cluster_t mn_start; - cluster_t mn_end; - unsigned long state; - atomic_trefcnt; - struct ploop_map*parent; - - struct page *page; - struct list_headlru; - u8 *levels; - - /* List of preq's blocking on this mapping. -* -* We queue here several kinds of requests: -* 1. If mapping is not uptodate, all the requests which need -*this mapping are queued here. preq state is ENTRY. -* 2. If preq requires index update and it is delayed -*because writeback is in progress. preq state is INDEX_DELAY, -*new index is kept in preq->iblock. -* 3. If preq's started index update, preq state is INDEX_WB, -*new indices are sent to io, but they are not inserted -*into mapping until writeback is complete. -*/ - struct list_headio_queue; -}; - cluster_t map_get_mn_end(struct map_node *m) { return m->mn_end; diff --git a/drivers/block/ploop/map.h b/drivers/block/ploop/map.h new file mode 100644 index ..580f23ccfbb0 --- /dev/null +++ b/drivers/block/ploop/map.h @@ -0,0 +1,56 @@ +#ifndef __PLOOP_MAP_H +#define __PLOOP_MAP_H + +/* This defines slot in mapping page. Right now it is 32 bit + * and therefore it directly matches ploop1 structure. */ +typedef u32 map_index_t; + +#define INDEX_PER_PAGE (PAGE_SIZE / sizeof(map_index_t)) +#define INDEX_PER_PAGE_SHIFT (PAGE_SHIFT - 2) + +/* + * Additional information for each page is: + * 1. rb tree link + * 2. Page + * 3. mn_start, mn_end - the first and the last index + * (correspondingly) the page maps to iblocks. + * 4. lru linkage + * 5. delta level of whole page, it is delta, where this page + *is backed. + * 6. Array of delta levels for each map_index in the page. + *If page is backed at level N, those levels cannot be >N. + *If all the levels == N, array of levels is not allocated. + *When at least one level < N, it is stored in the array. + *Note, that in this case exporting page to disk implies + *clearing irrelevant entries. + */ + +struct map_node +{ + struct rb_node rb_link; + cluster_t mn_start; + cluster_t mn_end; + unsigned long state; + atomic_trefcnt; + struct ploop_map*parent; + + struct page *page; + struct list_headlru; + u8 *levels; + + /* List of preq's blocking on this mapping. +* +* We queue here several kinds of requests: +* 1. If mapping is not uptodate, all the requests which need +*this mapping are
[Devel] [PATCH 04/10] ploop: Prohibit discard ioctls
Prohibit maintaince mode related ioctls. Signed-off-by: Kirill Tkhai --- drivers/block/ploop/dev.c | 10 ++ drivers/block/ploop/discard.c |6 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index f8b9b0bcba78..11bc864a5679 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -4522,6 +4522,8 @@ static int ploop_freeblks_ioc(struct ploop_device *plo, unsigned long arg) int i; int rc = 0; + return -EINVAL; + if (list_empty(>map.delta_list)) return -ENOENT; @@ -4605,6 +4607,8 @@ static int ploop_fbget_ioc(struct ploop_device *plo, unsigned long arg) struct ploop_freeblks_ctl ctl; int rc = 0; + return -EINVAL; + if (list_empty(>map.delta_list)) return -ENOENT; @@ -4629,6 +4633,8 @@ static int ploop_fbfilter_ioc(struct ploop_device *plo, unsigned long arg) { int rc = 0; + return -EINVAL; + if (plo->maintenance_type != PLOOP_MNTN_DISCARD || !test_bit(PLOOP_S_DISCARD_LOADED, >state)) return -EINVAL; @@ -4716,6 +4722,8 @@ static void ploop_discard_restart(struct ploop_device *plo, int err) static int ploop_fbdrop_ioc(struct ploop_device *plo) { + return -EINVAL; + if (list_empty(>map.delta_list)) return -ENOENT; @@ -4742,6 +4750,8 @@ static int ploop_relocblks_ioc(struct ploop_device *plo, unsigned long arg) int err = 0; int n_free; + return -EINVAL; + if (list_empty(>map.delta_list)) return -ENOENT; diff --git a/drivers/block/ploop/discard.c b/drivers/block/ploop/discard.c index 33122497b9c5..1920ab05e20b 100644 --- a/drivers/block/ploop/discard.c +++ b/drivers/block/ploop/discard.c @@ -17,6 +17,8 @@ int ploop_discard_init_ioc(struct ploop_device *plo) struct ploop_freeblks_desc *fbd; struct ploop_delta *delta = ploop_top_delta(plo); + return -EINVAL; + if (delta == NULL) return -EINVAL; @@ -52,6 +54,8 @@ int ploop_discard_fini_ioc(struct ploop_device *plo) struct ploop_request *preq, *tmp; LIST_HEAD(drop_list); + return -EINVAL; + if (!test_and_clear_bit(PLOOP_S_DISCARD, >state)) return 0; @@ -90,6 +94,8 @@ int ploop_discard_wait_ioc(struct ploop_device *plo) { int err; + return -EINVAL; + if (!test_bit(PLOOP_S_DISCARD, >state)) return 0; ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 06/10] ploop: Introduce data_off_in_clusters() helper
Signed-off-by: Kirill Tkhai --- drivers/block/ploop/fmt_ploop1.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c index eea15aac8790..370aa6369a8a 100644 --- a/drivers/block/ploop/fmt_ploop1.c +++ b/drivers/block/ploop/fmt_ploop1.c @@ -25,6 +25,13 @@ struct ploop1_private sector_tl1_off; }; +static unsigned int data_off_in_clusters(struct ploop_delta *delta) +{ + struct ploop1_private *ph = delta->priv; + + return (ph->l1_off >> delta->cluster_log); +} + int ploop1_map_index(struct ploop_delta * delta, unsigned long block, sector_t *sec) { struct ploop1_private * ph = delta->priv; ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 00/10] ploop: Discard with zeroing of ploop1 indexes support
--- Kirill Tkhai (10): ploop: Export map defines to separate header file ploop: Make submit_alloc() return int value ploop: Introduce ploop_submit_alloc() helper ploop: Prohibit discard ioctls ploop: Prohibit PLOOP_IOC_UPDATE_INDEX on singular list ploop: Introduce data_off_in_clusters() helper ploop: Add .complete_merge method ploop: Zero indexes on discard ploop: Fallocate cluster in cached_submit() during hole reuse ploop: Populate and maintain holes bitmap drivers/block/ploop/dev.c | 41 ++- drivers/block/ploop/discard.c |6 + drivers/block/ploop/fmt_ploop1.c| 215 ++- drivers/block/ploop/fmt_raw.c |2 drivers/block/ploop/io.c| 39 ++ drivers/block/ploop/io_direct.c | 43 --- drivers/block/ploop/io_direct_map.c | 13 ++ drivers/block/ploop/io_kaio.c | 22 +--- drivers/block/ploop/map.c | 58 + drivers/block/ploop/map.h | 56 + include/linux/ploop/ploop.h | 11 +- 11 files changed, 406 insertions(+), 100 deletions(-) create mode 100644 drivers/block/ploop/map.h -- Signed-off-by: Kirill Tkhai ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 05/10] ploop: Prohibit PLOOP_IOC_UPDATE_INDEX on singular list
Add debug check to not miss a moment userspace decides to change its behavior. Signed-off-by: Kirill Tkhai --- drivers/block/ploop/dev.c |9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 11bc864a5679..57920a93129a 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -4277,7 +4277,14 @@ static int ploop_index_update_ioc(struct ploop_device *plo, unsigned long arg) struct reloc_map *map; int i; - if (list_empty(>map.delta_list)) + /* +* Currently this is used by userspace for relocation +* of start blocks before expanding BAT of !top delta. +* In case of someone wants to do that on a singular +* list, he should expand holes bitmap too. +*/ + if (list_empty(>map.delta_list) || + list_is_singular(>map.delta_list)) return -ENOENT; if (copy_from_user(, (void*)arg, ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 0/3] fix jumbo chunk warning
Initially the warning was added incorrectly due to unsynchronization of pcs_mds_sys_info structure with the userspace client. Let's sync the structure and fix that. Pavel Butsykin (3): Revert "fs/fuse kio: add warning about jumbo chunks" fs/fuse kio: sync pcs_mds_sys_info struct fs/fuse kio: add warning about jumbo chunks fs/fuse/kio/pcs/pcs_client_types.h | 6 +++--- fs/fuse/kio/pcs/pcs_cluster.c | 8 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 4 ++-- fs/fuse/kio/pcs/pcs_mds_prot.h | 2 -- fs/fuse/kio/pcs/pcs_prot_types.h | 10 ++ 5 files changed, 15 insertions(+), 15 deletions(-) -- 2.15.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 2/3] fs/fuse kio: sync pcs_mds_sys_info struct
For some reason pcs_mds_sys_info structure is different in the kernel and userspace. Let's synchronize it to avoid inaccuracies and discrepancies in the future. Signed-off-by: Pavel Butsykin --- fs/fuse/kio/pcs/pcs_client_types.h | 6 +++--- fs/fuse/kio/pcs/pcs_cluster.c | 8 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 2 +- fs/fuse/kio/pcs/pcs_prot_types.h | 10 ++ 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/fuse/kio/pcs/pcs_client_types.h b/fs/fuse/kio/pcs/pcs_client_types.h index 1be32cbbf285..c0d14ec5b5d4 100644 --- a/fs/fuse/kio/pcs/pcs_client_types.h +++ b/fs/fuse/kio/pcs/pcs_client_types.h @@ -87,9 +87,9 @@ static inline void pcs_set_fileinfo(struct pcs_dentry_info *i, const struct pcs_ if (mi->sys.stripe_depth == 0) { mi->sys.stripe_depth = 1; - mi->sys.strip_width = mi->sys.chunk_size; + mi->sys.strip_width = mi->sys.chunk_size_lo; } - i->mapping.chunk_size_bits = ilog2(mi->sys.chunk_size); + i->mapping.chunk_size_bits = ilog2(mi->sys.chunk_size_lo); } @@ -170,7 +170,7 @@ typedef struct _pcs_api_csconnreq_t { #define DENTRY_ARGS(de) PCS_FILE_ID_ARGS((de)->id.parent), PCS_FILE_ID_ARGS((de)->fileinfo.attr.id) #define DENTRY_SIZE(de) ((de)->fileinfo.attr.size) -#define DENTRY_CHUNK_SIZE(de) ((de)->fileinfo.sys.chunk_size) +#define DENTRY_CHUNK_SIZE(de) ((de)->fileinfo.sys.chunk_size_lo) #define DENTRY_CHUNK_SIZE_BITS(de) ((de)->mapping.chunk_size_bits) void pcs_mapset_limit(struct pcs_map_set *maps, int limit); diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c index 5df263f01f98..a73120c97e5e 100644 --- a/fs/fuse/kio/pcs/pcs_cluster.c +++ b/fs/fuse/kio/pcs/pcs_cluster.c @@ -364,9 +364,9 @@ static noinline void __pcs_cc_process_ireq_rw(struct pcs_int_request *ireq) unsigned int len; u64 rpos, chunk, end_pos; - rpos = map_file_to_chunk(pos, di->fileinfo.sys.chunk_size, di->fileinfo.sys.stripe_depth, di->fileinfo.sys.strip_width); + rpos = map_file_to_chunk(pos, di->fileinfo.sys.chunk_size_lo, di->fileinfo.sys.stripe_depth, di->fileinfo.sys.strip_width); - chunk = rpos & ~((u64)di->fileinfo.sys.chunk_size - 1); + chunk = rpos & ~((u64)di->fileinfo.sys.chunk_size_lo - 1); end_pos = ((rpos / di->fileinfo.sys.strip_width) + 1) * (u64)di->fileinfo.sys.strip_width; sreq = ireq_alloc(di); @@ -385,9 +385,9 @@ static noinline void __pcs_cc_process_ireq_rw(struct pcs_int_request *ireq) sreq->iochunk.cmd = ireq->apireq.req->type; sreq->iochunk.cs_index = 0; sreq->iochunk.chunk = chunk; - sreq->iochunk.offset = rpos % di->fileinfo.sys.chunk_size; + sreq->iochunk.offset = rpos % di->fileinfo.sys.chunk_size_lo; sreq->iochunk.dio_offset = dio_offset; - len = di->fileinfo.sys.chunk_size - sreq->iochunk.offset; + len = di->fileinfo.sys.chunk_size_lo - sreq->iochunk.offset; if (len > sz) len = sz; if (rpos + len > end_pos) diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index 875ad18df4a7..6d12b8038e8a 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -343,7 +343,7 @@ static int kpcs_do_file_open(struct fuse_conn *fc, struct file *file, struct ino INIT_LIST_HEAD(>kq); spin_lock_init(>kq_lock); TRACE("init id:%llu chunk_size:%d stripe_depth:%d strip_width:%d\n", - fi->nodeid, di->fileinfo.sys.chunk_size, + fi->nodeid, di->fileinfo.sys.chunk_size_lo, di->fileinfo.sys.stripe_depth, di->fileinfo.sys.strip_width); ret = fuse_pcs_kdirect_claim_op(fc, file, true); diff --git a/fs/fuse/kio/pcs/pcs_prot_types.h b/fs/fuse/kio/pcs/pcs_prot_types.h index bba1f1dc76e3..0e7bc9743885 100644 --- a/fs/fuse/kio/pcs/pcs_prot_types.h +++ b/fs/fuse/kio/pcs/pcs_prot_types.h @@ -104,15 +104,17 @@ struct __pre_aligned(8) pcs_mds_fattr }; struct __pre_aligned(8) pcs_mds_sys_info { - u32 map_type; /* reserved for RAID */ - u32 chunk_size; /* global constant */ + u8 map_type; /* reserved for RAID */ + u8 reserved[2]; + u8 chunk_size_hi; /* chunk size (hi bits) */ + u32 chunk_size_lo; /* chunk size (lo bits) */ u8 stripe_depth; /* for RAID6/RS */ u8 redundancy; /* number of checksums for RAID6/RS */ u8 tolerance;/* write-tolerance (how much lost replicas we can tolerate still allowing writing) */ - u8 reserved8; + u8 reserved2; u32 strip_width; /* length of strip for RAID6/RS */ u32 lease_tout; /* lease expiration timeout (in milliseconds) */ -
[Devel] [PATCH 1/3] Revert "fs/fuse kio: add warning about jumbo chunks"
This reverts commit afb8d534110dbe203e4ae6385cef79b38c9e4771. Signed-off-by: Pavel Butsykin --- fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 3 --- fs/fuse/kio/pcs/pcs_mds_prot.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index 104fab25692b..875ad18df4a7 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -318,9 +318,6 @@ static int kpcs_do_file_open(struct fuse_conn *fc, struct file *file, struct ino if (info.sys.map_type != PCS_MAP_PLAIN) { TRACE("Unsupported map_type:%x, ignore\n", info.sys.map_type); - - if (info.sys.map_type & PCS_JUMBO_CHUNK_FLAG) - pr_warn_once("kio: fpath doesn't support jumbo chunks\n"); return 0; } diff --git a/fs/fuse/kio/pcs/pcs_mds_prot.h b/fs/fuse/kio/pcs/pcs_mds_prot.h index 4aca1e970170..80c20fde1537 100644 --- a/fs/fuse/kio/pcs/pcs_mds_prot.h +++ b/fs/fuse/kio/pcs/pcs_mds_prot.h @@ -259,8 +259,6 @@ enum PCS_MAP_LS = PCS_MAP_COMBINED, /* Log structured storage */ }; -#define PCS_JUMBO_CHUNK_FLAG (1ULL << 63) /* Chunks size > 4G */ - /* Max inline file size */ #define PCS_MAX_INLINE_SIZE 0x10 /* 1Mb */ -- 2.15.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 3/3] fs/fuse kio: add warning about jumbo chunks
KIO doesn't support jumbo chunks yet, so all requests to jumbo chunks are silently redirected to user-space. It will be useful to see a message about this until support has been added to KIO. Signed-off-by: Pavel Butsykin --- fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index 6d12b8038e8a..aa1c79f440cb 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -318,6 +318,9 @@ static int kpcs_do_file_open(struct fuse_conn *fc, struct file *file, struct ino if (info.sys.map_type != PCS_MAP_PLAIN) { TRACE("Unsupported map_type:%x, ignore\n", info.sys.map_type); + + if (info.sys.chunk_size_hi) + pr_warn_once("kio: fpath doesn't support jumbo chunks\n"); return 0; } -- 2.15.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH] fs/fuse kio: fix getting a non-existent pcs inode in kpcs_kill_requests()
Some files may not have pcs inode, so it will be safer to check for !fi->private before using it. Signed-off-by: Pavel Butsykin --- fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index 875ad18df4a7..a8c235244506 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -1506,11 +1506,16 @@ static void kpcs_kill_requests(struct fuse_conn *fc, struct inode *inode) list_for_each_entry(ff, >conn_files, fl) { struct pcs_dentry_info *di; + struct fuse_inode *fi; if (!ff->ff_dentry) continue; - di = get_pcs_inode(ff->ff_dentry->d_inode); + fi = get_fuse_inode(ff->ff_dentry->d_inode); + if (!fi->private) + continue; + + di = pcs_inode_from_fuse(fi); spin_lock(>kq_lock); fuse_kill_requests(fc, inode, >kq); -- 2.15.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RH7] ve: don't hide cgroup mount root from pseudosuper
Will need it to check mouninfo persists at the late restore, where the root task is inside ve. https://jira.sw.ru/browse/PSBM-86686 Signed-off-by: Pavel Tikhomirov --- kernel/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d35e2ac150a3..9ca8af9f73d9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1862,7 +1862,9 @@ EXPORT_SYMBOL_GPL(cgroup_path); int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen) { - return __cgroup_path(cgrp, buf, buflen, !ve_is_super(get_exec_env())); + struct ve_struct *ve = get_exec_env(); + + return __cgroup_path(cgrp, buf, buflen, !ve_is_super(ve) && !ve->is_pseudosuper); } /* -- 2.20.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: fix double free of pfc in process_pcs_init_reply()
The commit is pushed to "branch-rh7-3.10.0-957.10.1.vz7.85.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-957.10.1.vz7.85.3 --> commit 57a6200cbb664031506df3b2828f34b469e8d333 Author: Pavel Butsykin Date: Tue Mar 26 12:49:03 2019 +0300 fs/fuse kio: fix double free of pfc in process_pcs_init_reply() Remove redundant kvfree(pfc) in process_pcs_init_reply() to fix it. This branch can only be taken if an abortion occurs during the mount, that is quite a rare event. https://pmc.acronis.com/browse/VSTOR-20922 Signed-off-by: Pavel Butsykin Reviewed-by: Kirill Tkhai --- fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index b908337f0e11..035f58302be3 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -127,7 +127,6 @@ static void process_pcs_init_reply(struct fuse_conn *fc, struct fuse_req *req) if (pfc) { fuse_ktrace_remove(fc); pcs_cluster_fini(pfc); - kvfree(pfc); } out: if (fc->conn_error) ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH RHEL7 COMMIT] Revert "ploop: Remove excess variable"
The commit is pushed to "branch-rh7-3.10.0-957.10.1.vz7.85.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-957.10.1.vz7.85.3 --> commit 855b630cf3d5afe7c96c230b1958d0c02ffd6ae6 Author: Kirill Tkhai Date: Tue Mar 26 12:36:59 2019 +0300 Revert "ploop: Remove excess variable" saved_ioc may be NULL. Then we increment the counter and never decrement it. It even may overflow later. https://jira.sw.ru/browse/PSBM-93006 Signed-off-by: Kirill Tkhai --- drivers/block/ploop/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 1d6ee2329bb9..f8b9b0bcba78 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2565,6 +2565,7 @@ static void ploop_req_state_process(struct ploop_request * preq) struct ploop_device * plo = preq->plo; struct ploop_delta * top_delta; struct io_context * saved_ioc = NULL; + int release_ioc = 0; #ifdef CONFIG_BEANCOUNTERS struct user_beancounter *saved_ub = NULL; #endif @@ -2575,6 +2576,7 @@ static void ploop_req_state_process(struct ploop_request * preq) saved_ioc = current->io_context; current->io_context = preq->ioc; atomic_long_inc(>ioc->refcount); + release_ioc = 1; } #ifdef CONFIG_BEANCOUNTERS WARN_ONCE(!preq->preq_ub, @@ -2906,7 +2908,7 @@ static void ploop_req_state_process(struct ploop_request * preq) BUG(); } out: - if (saved_ioc) { + if (release_ioc) { struct io_context * ioc = current->io_context; current->io_context = saved_ioc; put_io_context(ioc); ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel