On 11/11/19 3:59 PM, Mikulas Patocka wrote:
> Snapshot doesn't work with realtime kernels since the commit f79ae415b64c.
> hlist_bl is implemented as a raw spinlock and the code takes two non-raw
> spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes
> in the realtime kernel, so they couldn't be taken inside a raw spinlock).
> 
> This patch fixes the problem by using non-raw spinlock
> exception_table_lock instead of the hlist_bl lock.
> 
> Signed-off-by: Mikulas Patocka <[email protected]>
> Fixes: f79ae415b64c ("dm snapshot: Make exception tables scalable")
> 

Hi Mikulas,

I wasn't aware that hlist_bl is implemented as a raw spinlock in the
real time kernel. I would expect it to be a standard non-raw spinlock,
so everything works as expected. But, after digging further in the real
time tree, I found commit ad7675b15fd87f1 ("list_bl: Make list head
locking RT safe") which suggests that such a conversion would break
other parts of the kernel.

That said,

  Reviewed-by: Nikos Tsironis <[email protected]>

> ---
>  drivers/md/dm-snap.c |   65 
> ++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 42 insertions(+), 23 deletions(-)
> 
> Index: linux-2.6/drivers/md/dm-snap.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-snap.c       2019-11-08 15:51:42.000000000 
> +0100
> +++ linux-2.6/drivers/md/dm-snap.c    2019-11-08 15:54:58.000000000 +0100
> @@ -141,6 +141,10 @@ struct dm_snapshot {
>        * for them to be committed.
>        */
>       struct bio_list bios_queued_during_merge;
> +
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +     spinlock_t exception_table_lock;
> +#endif
>  };
>  
>  /*
> @@ -625,30 +629,42 @@ static uint32_t exception_hash(struct dm
>  
>  /* Lock to protect access to the completed and pending exception hash 
> tables. */
>  struct dm_exception_table_lock {
> +#ifndef CONFIG_PREEMPT_RT_BASE
>       struct hlist_bl_head *complete_slot;
>       struct hlist_bl_head *pending_slot;
> +#endif
>  };
>  
>  static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t 
> chunk,
>                                        struct dm_exception_table_lock *lock)
>  {
> +#ifndef CONFIG_PREEMPT_RT_BASE
>       struct dm_exception_table *complete = &s->complete;
>       struct dm_exception_table *pending = &s->pending;
>  
>       lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
>       lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
> +#endif
>  }
>  
> -static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
> +static void dm_exception_table_lock(struct dm_snapshot *s, struct 
> dm_exception_table_lock *lock)
>  {
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +     spin_lock(&s->exception_table_lock);
> +#else
>       hlist_bl_lock(lock->complete_slot);
>       hlist_bl_lock(lock->pending_slot);
> +#endif
>  }
>  
> -static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
> +static void dm_exception_table_unlock(struct dm_snapshot *s, struct 
> dm_exception_table_lock *lock)
>  {
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +     spin_unlock(&s->exception_table_lock);
> +#else
>       hlist_bl_unlock(lock->pending_slot);
>       hlist_bl_unlock(lock->complete_slot);
> +#endif
>  }
>  
>  static int dm_exception_table_init(struct dm_exception_table *et,
> @@ -835,9 +851,9 @@ static int dm_add_exception(void *contex
>        */
>       dm_exception_table_lock_init(s, old, &lock);
>  
> -     dm_exception_table_lock(&lock);
> +     dm_exception_table_lock(s, &lock);
>       dm_insert_exception(&s->complete, e);
> -     dm_exception_table_unlock(&lock);
> +     dm_exception_table_unlock(s, &lock);
>  
>       return 0;
>  }
> @@ -1318,6 +1334,9 @@ static int snapshot_ctr(struct dm_target
>       s->first_merging_chunk = 0;
>       s->num_merging_chunks = 0;
>       bio_list_init(&s->bios_queued_during_merge);
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +     spin_lock_init(&s->exception_table_lock);
> +#endif
>  
>       /* Allocate hash table for COW data */
>       if (init_hash_tables(s)) {
> @@ -1651,7 +1670,7 @@ static void pending_complete(void *conte
>               invalidate_snapshot(s, -EIO);
>               error = 1;
>  
> -             dm_exception_table_lock(&lock);
> +             dm_exception_table_lock(s, &lock);
>               goto out;
>       }
>  
> @@ -1660,13 +1679,13 @@ static void pending_complete(void *conte
>               invalidate_snapshot(s, -ENOMEM);
>               error = 1;
>  
> -             dm_exception_table_lock(&lock);
> +             dm_exception_table_lock(s, &lock);
>               goto out;
>       }
>       *e = pe->e;
>  
>       down_read(&s->lock);
> -     dm_exception_table_lock(&lock);
> +     dm_exception_table_lock(s, &lock);
>       if (!s->valid) {
>               up_read(&s->lock);
>               free_completed_exception(e);
> @@ -1687,16 +1706,16 @@ static void pending_complete(void *conte
>  
>       /* Wait for conflicting reads to drain */
>       if (__chunk_is_tracked(s, pe->e.old_chunk)) {
> -             dm_exception_table_unlock(&lock);
> +             dm_exception_table_unlock(s, &lock);
>               __check_for_conflicting_io(s, pe->e.old_chunk);
> -             dm_exception_table_lock(&lock);
> +             dm_exception_table_lock(s, &lock);
>       }
>  
>  out:
>       /* Remove the in-flight exception from the list */
>       dm_remove_exception(&pe->e);
>  
> -     dm_exception_table_unlock(&lock);
> +     dm_exception_table_unlock(s, &lock);
>  
>       snapshot_bios = bio_list_get(&pe->snapshot_bios);
>       origin_bios = bio_list_get(&pe->origin_bios);
> @@ -1968,7 +1987,7 @@ static int snapshot_map(struct dm_target
>       }
>  
>       down_read(&s->lock);
> -     dm_exception_table_lock(&lock);
> +     dm_exception_table_lock(s, &lock);
>  
>       if (!s->valid || (unlikely(s->snapshot_overflowed) &&
>           bio_data_dir(bio) == WRITE)) {
> @@ -1997,7 +2016,7 @@ static int snapshot_map(struct dm_target
>               remap_exception(s, e, bio, chunk);
>               if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
>                   io_overlaps_chunk(s, bio)) {
> -                     dm_exception_table_unlock(&lock);
> +                     dm_exception_table_unlock(s, &lock);
>                       up_read(&s->lock);
>                       zero_exception(s, e, bio, chunk);
>                       r = DM_MAPIO_SUBMITTED; /* discard is not issued */
> @@ -2024,9 +2043,9 @@ static int snapshot_map(struct dm_target
>       if (bio_data_dir(bio) == WRITE) {
>               pe = __lookup_pending_exception(s, chunk);
>               if (!pe) {
> -                     dm_exception_table_unlock(&lock);
> +                     dm_exception_table_unlock(s, &lock);
>                       pe = alloc_pending_exception(s);
> -                     dm_exception_table_lock(&lock);
> +                     dm_exception_table_lock(s, &lock);
>  
>                       e = dm_lookup_exception(&s->complete, chunk);
>                       if (e) {
> @@ -2037,7 +2056,7 @@ static int snapshot_map(struct dm_target
>  
>                       pe = __find_pending_exception(s, pe, chunk);
>                       if (!pe) {
> -                             dm_exception_table_unlock(&lock);
> +                             dm_exception_table_unlock(s, &lock);
>                               up_read(&s->lock);
>  
>                               down_write(&s->lock);
> @@ -2063,7 +2082,7 @@ static int snapshot_map(struct dm_target
>               if (!pe->started && io_overlaps_chunk(s, bio)) {
>                       pe->started = 1;
>  
> -                     dm_exception_table_unlock(&lock);
> +                     dm_exception_table_unlock(s, &lock);
>                       up_read(&s->lock);
>  
>                       start_full_bio(pe, bio);
> @@ -2076,7 +2095,7 @@ static int snapshot_map(struct dm_target
>                       /* this is protected by the exception table lock */
>                       pe->started = 1;
>  
> -                     dm_exception_table_unlock(&lock);
> +                     dm_exception_table_unlock(s, &lock);
>                       up_read(&s->lock);
>  
>                       start_copy(pe);
> @@ -2088,7 +2107,7 @@ static int snapshot_map(struct dm_target
>       }
>  
>  out_unlock:
> -     dm_exception_table_unlock(&lock);
> +     dm_exception_table_unlock(s, &lock);
>       up_read(&s->lock);
>  out:
>       return r;
> @@ -2449,7 +2468,7 @@ static int __origin_write(struct list_he
>               dm_exception_table_lock_init(snap, chunk, &lock);
>  
>               down_read(&snap->lock);
> -             dm_exception_table_lock(&lock);
> +             dm_exception_table_lock(snap, &lock);
>  
>               /* Only deal with valid and active snapshots */
>               if (!snap->valid || !snap->active)
> @@ -2466,9 +2485,9 @@ static int __origin_write(struct list_he
>                       if (e)
>                               goto next_snapshot;
>  
> -                     dm_exception_table_unlock(&lock);
> +                     dm_exception_table_unlock(snap, &lock);
>                       pe = alloc_pending_exception(snap);
> -                     dm_exception_table_lock(&lock);
> +                     dm_exception_table_lock(snap, &lock);
>  
>                       pe2 = __lookup_pending_exception(snap, chunk);
>  
> @@ -2481,7 +2500,7 @@ static int __origin_write(struct list_he
>  
>                               pe = __insert_pending_exception(snap, pe, 
> chunk);
>                               if (!pe) {
> -                                     dm_exception_table_unlock(&lock);
> +                                     dm_exception_table_unlock(snap, &lock);
>                                       up_read(&snap->lock);
>  
>                                       invalidate_snapshot(snap, -ENOMEM);
> @@ -2516,7 +2535,7 @@ static int __origin_write(struct list_he
>               }
>  
>  next_snapshot:
> -             dm_exception_table_unlock(&lock);
> +             dm_exception_table_unlock(snap, &lock);
>               up_read(&snap->lock);
>  
>               if (pe_to_start_now) {
> 

--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to