On 9/16/25 22:02, Vasileios Almpanis wrote:
When multiple threads attempt operations on the same cluster simultaneously,
the current code logs warnings but continues with potentially conflicting
operations, leading to WARN_ON triggers.

Fix by implementing proper deferral when cluster locks fail:

1. Change ploop_link_pio() to add identical pios to the lock holder's
    endio list instead of issuing warnings and continuing
2. Rename ploop_add_cluster_lk() to ploop_try_add_cluster_lk() to reflect
    the new behavior
3. Update COW and discard paths to abort operations when cluster locking
    fails, allowing proper deferral.

This ensures that conflicting operations are serialized rather than
corrupting shared data structures.

Observed during snapshot creation where filesystem metadata updates
trigger concurrent operations on the same clusters.

https://virtuozzo.atlassian.net/browse/VSTOR-115446
Signed-off-by: Vasileios Almpanis <vasileios.almpa...@virtuozzo.com>

Feature: dm-ploop: ploop target driver
---
  drivers/md/dm-ploop-map.c | 33 +++++++++++++++++++--------------
  1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 56481d0f8323..db89a668e766 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -535,10 +535,10 @@ static int ploop_link_pio(struct hlist_head head[], 
struct pio *pio,
                if (pe == pio)
                        return 1;
- WARN_ON_ONCE(pe != NULL);
-
-               if (pe)
-                       PL_ERR("clu:%u already exclusively locked\n", clu);
+               if (pe) {
+                       ploop_add_endio_pio(pe, pio);
+                       return 0;
+               }
        }
if (!hlist_unhashed_lockless(&pio->hlist_node)) {
@@ -569,7 +569,7 @@ static int ploop_unlink_pio(struct ploop *ploop, struct pio 
*pio,
        return 1;
  }
-static int ploop_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 clu)
+static int ploop_try_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 
clu)
  {
        unsigned long flags;
        int ret;
@@ -741,6 +741,10 @@ static int ploop_handle_discard_pio(struct ploop *ploop, 
struct pio *pio,
        if (ploop->nr_deltas != 1)
                goto punch_hole;
+ if (!ploop_try_add_cluster_lk(ploop, pio, clu)) {
+               return 1;
+       }

Please remove excess {} brackets.

> Do not unnecessarily use braces where a single statement will do.

https://www.kernel.org/doc/html/v4.10/process/coding-style.html#placing-braces-and-spaces

+
        spin_lock_irqsave(&ploop->inflight_lock, flags);
        inflight_h = ploop_find_inflight_bio(ploop, clu);
        if (inflight_h)
@@ -754,8 +758,6 @@ static int ploop_handle_discard_pio(struct ploop *ploop, 
struct pio *pio,
                return 0;
        }
- if (!ploop_add_cluster_lk(ploop, pio, clu))
-               PL_ERR("dis clu %d already locked\n", clu);

Why do we move it above inflight-delay code? Is there a specific reason? Except that code looks good.

        pio->wants_discard_index_cleanup = true;
punch_hole:
@@ -1585,12 +1587,18 @@ static int ploop_submit_cluster_cow(struct ploop 
*ploop, unsigned int level,
  {
        struct ploop_cow *cow = NULL;
        struct pio *aux_pio = NULL;
+       int ret = 0;
/* Prepare new delta read */
        aux_pio = ploop_alloc_pio_with_pages(ploop);
        cow = kmem_cache_alloc(cow_cache, GFP_ATOMIC);
-       if (!aux_pio || !cow)
-               goto err;
+       if (!aux_pio || !cow) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }

Please add newline here to separate allocation checks from cluster locking to improve code readability.

+       if (!ploop_try_add_cluster_lk(ploop, cow_pio, clu))
+               goto cleanup;
+
        ploop_init_pio(ploop, REQ_OP_READ, aux_pio);
        ploop_pio_prepare_offsets(ploop, aux_pio, clu);
        aux_pio->css = cow_pio->css;
@@ -1602,17 +1610,14 @@ static int ploop_submit_cluster_cow(struct ploop 
*ploop, unsigned int level,
        cow->aux_pio = aux_pio;
        cow->cow_pio = cow_pio;
- if (!ploop_add_cluster_lk(ploop, cow_pio, clu))
-               PL_ERR("cowclu %d already locked\n", clu);
-
        /* Stage #0: read secondary delta full clu */
        ploop_map_and_submit_rw(ploop, dst_clu, aux_pio, level);
        return 0;
-err:
+cleanup:
        if (aux_pio)
                ploop_free_pio_with_pages(ploop, aux_pio);
        kfree(cow);
-       return -ENOMEM;
+       return ret;
  }
static int ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int level,

--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to