________________________________
From: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
Sent: Thursday, September 18, 2025 8:34 AM
To: Vasileios Almpanis <vasileios.almpa...@virtuozzo.com>; Andrey Zhadchenko 
<andrey.zhadche...@virtuozzo.com>; Konstantin Khorenko <khore...@virtuozzo.com>
Cc: devel@openvz.org <devel@openvz.org>
Subject: Re: [Devel] [PATCH vz9/vz10] dm-ploop: fix concurrent COW operations 
on same cluster



On 9/16/25 22:02, Vasileios Almpanis wrote:
> When multiple threads attempt operations on the same cluster simultaneously,
> the current code logs warnings but continues with potentially conflicting
> operations, leading to WARN_ON triggers.
>
> Fix by implementing proper deferral when cluster locks fail:
>
> 1. Change ploop_link_pio() to add identical pios to the lock holder's
>     endio list instead of issuing warnings and continuing
> 2. Rename ploop_add_cluster_lk() to ploop_try_add_cluster_lk() to reflect
>     the new behavior
> 3. Update COW and discard paths to abort operations when cluster locking
>     fails, allowing proper deferral.
>
> This ensures that conflicting operations are serialized rather than
> corrupting shared data structures.
>
> Observed during snapshot creation where filesystem metadata updates
> trigger concurrent operations on the same clusters.
>
> https://virtuozzo.atlassian.net/browse/VSTOR-115446
> Signed-off-by: Vasileios Almpanis <vasileios.almpa...@virtuozzo.com>
>
> Feature: dm-ploop: ploop target driver
> ---
>   drivers/md/dm-ploop-map.c | 33 +++++++++++++++++++--------------
>   1 file changed, 19 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 56481d0f8323..db89a668e766 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -535,10 +535,10 @@ static int ploop_link_pio(struct hlist_head head[], 
> struct pio *pio,
>                if (pe == pio)
>                        return 1;
>
> -             WARN_ON_ONCE(pe != NULL);
> -
> -             if (pe)
> -                     PL_ERR("clu:%u already exclusively locked\n", clu);
> +             if (pe) {
> +                     ploop_add_endio_pio(pe, pio);
> +                     return 0;
> +             }
>        }
>
>        if (!hlist_unhashed_lockless(&pio->hlist_node)) {
> @@ -569,7 +569,7 @@ static int ploop_unlink_pio(struct ploop *ploop, struct 
> pio *pio,
>        return 1;
>   }
>
> -static int ploop_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 
> clu)
> +static int ploop_try_add_cluster_lk(struct ploop *ploop, struct pio *pio, 
> u32 clu)
>   {
>        unsigned long flags;
>        int ret;
> @@ -741,6 +741,10 @@ static int ploop_handle_discard_pio(struct ploop *ploop, 
> struct pio *pio,
>        if (ploop->nr_deltas != 1)
>                goto punch_hole;
>
> +     if (!ploop_try_add_cluster_lk(ploop, pio, clu)) {
> +             return 1;
> +     }

Please remove excess {} brackets.

 > Do not unnecessarily use braces where a single statement will do.

https://www.kernel.org/doc/html/v4.10/process/coding-style.html#placing-braces-and-spaces

> +
>        spin_lock_irqsave(&ploop->inflight_lock, flags);
>        inflight_h = ploop_find_inflight_bio(ploop, clu);
>        if (inflight_h)
> @@ -754,8 +758,6 @@ static int ploop_handle_discard_pio(struct ploop *ploop, 
> struct pio *pio,
>                return 0;
>        }
>
> -     if (!ploop_add_cluster_lk(ploop, pio, clu))
> -             PL_ERR("dis clu %d already locked\n", clu);

Why do we move it above inflight-delay code? Is there a specific reason?
Except that code looks good.

You are right, we should leave it as is. There is no particular reason for 
moving it.

>        pio->wants_discard_index_cleanup = true;
>
>   punch_hole:
> @@ -1585,12 +1587,18 @@ static int ploop_submit_cluster_cow(struct ploop 
> *ploop, unsigned int level,
>   {
>        struct ploop_cow *cow = NULL;
>        struct pio *aux_pio = NULL;
> +     int ret = 0;
>
>        /* Prepare new delta read */
>        aux_pio = ploop_alloc_pio_with_pages(ploop);
>        cow = kmem_cache_alloc(cow_cache, GFP_ATOMIC);
> -     if (!aux_pio || !cow)
> -             goto err;
> +     if (!aux_pio || !cow) {
> +             ret = -ENOMEM;
> +             goto cleanup;
> +     }

Please add newline here to separate allocation checks from cluster
locking to improve code readability.

> +     if (!ploop_try_add_cluster_lk(ploop, cow_pio, clu))
> +             goto cleanup;
> +
>        ploop_init_pio(ploop, REQ_OP_READ, aux_pio);
>        ploop_pio_prepare_offsets(ploop, aux_pio, clu);
>        aux_pio->css = cow_pio->css;
> @@ -1602,17 +1610,14 @@ static int ploop_submit_cluster_cow(struct ploop 
> *ploop, unsigned int level,
>        cow->aux_pio = aux_pio;
>        cow->cow_pio = cow_pio;
>
> -     if (!ploop_add_cluster_lk(ploop, cow_pio, clu))
> -             PL_ERR("cowclu %d already locked\n", clu);
> -
>        /* Stage #0: read secondary delta full clu */
>        ploop_map_and_submit_rw(ploop, dst_clu, aux_pio, level);
>        return 0;
> -err:
> +cleanup:
>        if (aux_pio)
>                ploop_free_pio_with_pages(ploop, aux_pio);
>        kfree(cow);
> -     return -ENOMEM;
> +     return ret;
>   }
>
>   static int ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int 
> level,

--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to