When multiple threads attempt operations on the same cluster simultaneously, the current code logs warnings but continues with potentially conflicting operations, leading to WARN_ON triggers.
Fix by implementing proper deferral when cluster locks fail: 1. Change ploop_link_pio() to add identical pios to the lock holder's endio list instead of issuing warnings and continuing 2. Rename ploop_add_cluster_lk() to ploop_try_add_cluster_lk() to reflect the new behavior 3. Update COW and discard paths to abort operations when cluster locking fails, allowing proper deferral. This ensures that conflicting operations are serialized rather than corrupting shared data structures. Observed during snapshot creation where filesystem metadata updates trigger concurrent operations on the same clusters. https://virtuozzo.atlassian.net/browse/VSTOR-115446 Signed-off-by: Vasileios Almpanis <vasileios.almpa...@virtuozzo.com> Feature: dm-ploop: ploop target driver --- drivers/md/dm-ploop-map.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c index 56481d0f8323..db89a668e766 100644 --- a/drivers/md/dm-ploop-map.c +++ b/drivers/md/dm-ploop-map.c @@ -535,10 +535,10 @@ static int ploop_link_pio(struct hlist_head head[], struct pio *pio, if (pe == pio) return 1; - WARN_ON_ONCE(pe != NULL); - - if (pe) - PL_ERR("clu:%u already exclusively locked\n", clu); + if (pe) { + ploop_add_endio_pio(pe, pio); + return 0; + } } if (!hlist_unhashed_lockless(&pio->hlist_node)) { @@ -569,7 +569,7 @@ static int ploop_unlink_pio(struct ploop *ploop, struct pio *pio, return 1; } -static int ploop_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 clu) +static int ploop_try_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 clu) { unsigned long flags; int ret; @@ -741,6 +741,10 @@ static int ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio, if (ploop->nr_deltas != 1) goto punch_hole; + if (!ploop_try_add_cluster_lk(ploop, pio, clu)) { + return 1; + } + spin_lock_irqsave(&ploop->inflight_lock, flags); inflight_h = ploop_find_inflight_bio(ploop, clu); if (inflight_h) @@ -754,8 +758,6 @@ static int ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio, return 0; } - if (!ploop_add_cluster_lk(ploop, pio, clu)) - PL_ERR("dis clu %d already locked\n", clu); pio->wants_discard_index_cleanup = true; punch_hole: @@ -1585,12 +1587,18 @@ static int ploop_submit_cluster_cow(struct ploop *ploop, unsigned int level, { struct ploop_cow *cow = NULL; struct pio *aux_pio = NULL; + int ret = 0; /* Prepare new delta read */ aux_pio = ploop_alloc_pio_with_pages(ploop); cow = kmem_cache_alloc(cow_cache, GFP_ATOMIC); - if (!aux_pio || !cow) - goto err; + if (!aux_pio || !cow) { + ret = -ENOMEM; + goto cleanup; + } + if (!ploop_try_add_cluster_lk(ploop, cow_pio, clu)) + goto cleanup; + ploop_init_pio(ploop, REQ_OP_READ, aux_pio); ploop_pio_prepare_offsets(ploop, aux_pio, clu); aux_pio->css = cow_pio->css; @@ -1602,17 +1610,14 @@ static int ploop_submit_cluster_cow(struct ploop *ploop, unsigned int level, cow->aux_pio = aux_pio; cow->cow_pio = cow_pio; - if (!ploop_add_cluster_lk(ploop, cow_pio, clu)) - PL_ERR("cowclu %d already locked\n", clu); - /* Stage #0: read secondary delta full clu */ ploop_map_and_submit_rw(ploop, dst_clu, aux_pio, level); return 0; -err: +cleanup: if (aux_pio) ploop_free_pio_with_pages(ploop, aux_pio); kfree(cow); - return -ENOMEM; + return ret; } static int ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int level, -- 2.43.0 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel