On Fri, 25 Feb 2022 16:32:39 +0800 Hillf Danton wrote:
> On Fri, 25 Feb 2022 09:47:16 +0800 Hillf Danton wrote:
> > On Thu, 24 Feb 2022 16:47:30 +0800 Hillf Danton wrote:
> > > On Sat,  8 Jan 2022 08:46:17 -0800
> > > > From: Tim Murray <timmur...@google.com>
> > > > 
> > > > f2fs rw_semaphores work better if writers can starve readers,
> > > > especially for the checkpoint thread, because writers are strictly
> > > > more important than reader threads. This prevents significant priority
> > > > inversion between low-priority readers that blocked while trying to
> > > > acquire the read lock and a second acquisition of the write lock that
> > > > might be blocking high priority work.
> > > > 
> > > > Signed-off-by: Tim Murray <timmur...@google.com>
> > > > Signed-off-by: Jaegeuk Kim <jaeg...@kernel.org>
> > > > ---
> > > 
> > > ...
> > > 
> > > > +/*
> > > > + * An implementation of an rwsem that is explicitly unfair to readers. 
> > > > This
> > > > + * prevents priority inversion when a low-priority reader acquires the 
> > > > read lock
> > > > + * while sleeping on the write lock but the write lock is needed by
> > > > + * higher-priority clients.
> > > > + */
> > > > +
> > > > +struct f2fs_rwsem {
> > > > +        struct rw_semaphore internal_rwsem;
> > > > +        wait_queue_head_t read_waiters;
> > > > +};
> > > 
> > > ...
> > > 
> > > > +static inline void f2fs_down_read(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       wait_event(sem->read_waiters, 
> > > > down_read_trylock(&sem->internal_rwsem));
> > > > +}
> > > > +
> > > > +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       return down_read_trylock(&sem->internal_rwsem);
> > > > +}
> > > > +
> > > > +static inline void f2fs_up_read(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       up_read(&sem->internal_rwsem);
> > > > +}
> > > > +
> > > > +static inline void f2fs_down_write(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       down_write(&sem->internal_rwsem);
> > > > +}
> > > > +
> > > > +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       return down_write_trylock(&sem->internal_rwsem);
> > > > +}
> > > > +
> > > > +static inline void f2fs_up_write(struct f2fs_rwsem *sem)
> > > > +{
> > > > +       up_write(&sem->internal_rwsem);
> > > > +       wake_up_all(&sem->read_waiters);
> > > > +}
> > > > +
> > > 
> > > Here is my two cents, the unfair rwsem derived from lock_sock(), which has
> > > no link to rwsem.
> > > 
> > > Only for thoughts now.
> > > 
> > > Hillf
> > > 
> > > struct unfair_rwsem {
> > >   spinlock_t      lock;
> > >   int             owner; /* < 0 writer, > 0 readers */
> > > 
> > >   struct list_head reader, writer; /* read/write waiters */
> > > 
> > > #ifdef CONFIG_DEBUG_LOCK_ALLOC
> > >   struct lockdep_map dep_map;
> > > #endif
> > > };
> > > 
> > > struct unfair_rwsem_wait {
> > >   struct list_head        node;
> > >   struct task_struct      *task;
> > > };
> > > 
> > > static void lock_unfair_rwsem(struct unfair_rwsem *us, int read)
> > > {
> > >   struct unfair_rwsem_wait wait;
> > > 
> > >   mutex_acquire(&us->dep_map, 0, 0, _RET_IP_);
> > >   might_sleep();
> > >   wait.task = current;
> > >   for (;;) {
> > >           spin_lock(&us->lock);
> > >           if (read) {
> > >                   if (us->owner >= 0) {
> > >                           us->owner++;
> > >                           spin_unlock(&us->lock);
> > >                           return;
> > >                   }
> > >                   list_add_tail(&wait.node, &us->reader);
> > >           } else {
> > >                   if (us->owner == 0) {
> > >                           us->owner--;
> > >                           spin_unlock(&us->lock);
> > >                           return;
> > >                   }
> > >                   list_add_tail(&wait.node, &us->writer);
> > >           }
> > >           set_current_state(TASK_UNINTERRUPTIBLE);
> > >           spin_unlock(&us->lock);
> > >           schedule();
> > >   }
> > > }
> > > 
> > > void down_read_unfair_rwsem(struct unfair_rwsem *us)
> > > {
> > >   lock_unfair_rwsem(us, 1);
> > > }
> > > 
> > > void down_write_unfair_rwsem(struct unfair_rwsem *us)
> > > {
> > >   lock_unfair_rwsem(us, 0);
> > > }
> > > 
> > > static void unlock_unfair_rwsem(struct unfair_rwsem *us, int read)
> > > {
> > >   struct list_head *head = NULL;
> > >   int all = 0;
> > > 
> > >   spin_lock(&us->lock);
> > >   if (us->owner < 0) {
> > >           BUG_ON(read);
> > >           us->owner++;
> > >           BUG_ON(0 != us->owner);
> > > 
> > >           if (!list_empty(&us->writer))
> > >                   head = &us->writer;
> > >           else if (!list_empty(&us->reader)) {
> > >                   head = &us->reader;
> > >                   all = 1;
> > >           }
> > >   } else if (us->owner > 0) {
> > >           BUG_ON(!read);
> > >           BUG_ON(!list_empty(&us->reader));
> > >           us->owner--;
> > >           if (us->owner == 0)
> > >                   if (!list_empty(&us->writer))
> > >                           head = &us->writer;
> > >   } else
> > >           BUG_ON(1);
> > > 
> > >   mutex_release(&us->dep_map, _RET_IP_);
> > >   if (head) {
> > >           struct unfair_rwsem_wait *wait;
> > >           do {
> > >                   wait = list_first_entry(head, struct unfair_rwsem_wait, 
> > > node);
> > >                   list_del(&wait->node);
> > >                   wake_up_process(wait->task);
> > >           } while (all && !list_empty(head));
> > >   }
> > >   spin_unlock(&us->lock);
> > > }
> > > 
> > > void up_write_unfair_rwsem(struct unfair_rwsem *us)
> > > {
> > >   unlock_unfair_rwsem(us, 0);
> > > }
> > > 
> > > void up_read_unfair_rwsem(struct unfair_rwsem *us)
> > > {
> > >   unlock_unfair_rwsem(us, 1);
> > > }
> > > 
> > 
> > And make unfair rwsem more unfair by setting lock owner for write waiter,
> > in addition to prefering to wake up write waiter over read waiter.
> > 
> > Hillf
> > 
> > --- x/unfair_rwsem.c
> > +++ y/unfair_rwsem.c
> > @@ -42,6 +42,8 @@ static void lock_unfair_rwsem(struct unf
> >             set_current_state(TASK_UNINTERRUPTIBLE);
> >             spin_unlock(&us->lock);
> >             schedule();
> > +           if (!read)
> > +                   return; /* because this is unfair rwsem */
> >     }
> >  }
> >  
> > @@ -88,8 +90,15 @@ static void unlock_unfair_rwsem(struct u
> >             do {
> >                     wait = list_first_entry(head, struct unfair_rwsem_wait, 
> > node);
> >                     list_del(&wait->node);
> > -                   wake_up_process(wait->task);
> > -           } while (all && !list_empty(head));
> > +                   if (all)
> > +                           wake_up_process(wait->task);
> > +                   else {
> > +                           /* for the sake of unfairness */
> > +                           us->owner = -1;
> > +                           wake_up_process(wait->task);
> > +                           break;
> > +                   }
> > +           } while (!list_empty(head));
> >     }
> >     spin_unlock(&us->lock);
> >  }
> > 
> 
> Add the trylock method to unfair rwsem.
> 
> Hillf
> 
> --- x/unfair_rwsem.c
> +++ y/unfair_rwsem.c
> @@ -113,3 +115,37 @@ void up_read_unfair_rwsem(struct unfair_
>       unlock_unfair_rwsem(us, 1);
>  }
>  
> +static bool trylock_unfair_rwsem(struct unfair_rwsem *us, int read)
> +{
> +     bool rc = false;
> +
> +     if (!spin_trylock(&us->lock))
> +             return false;
> +
> +     if (read) {
> +             if (us->owner >= 0) {
> +                     rc = true;
> +                     us->owner++;
> +             }
> +     } else {
> +             if (us->owner == 0) {
> +                     rc = true;
> +                     us->owner--;
> +             }
> +     }
> +     spin_unlock(&us->lock);
> +
> +     if (rc)
> +             mutex_acquire(&us->dep_map, 0, 1, _RET_IP_);
> +     return rc;
> +}
> +
> +bool try_down_write_unfair_rwsem(struct unfair_rwsem *us)
> +{
> +     return trylock_unfair_rwsem(us, 0);
> +}
> +
> +bool try_down_read_unfair_rwsem(struct unfair_rwsem *us)
> +{
> +     return trylock_unfair_rwsem(us, 1);
> +}

Add two changes.

The major change is to delete the spinlock loop in the down stage by setting
lock owner for both read and write waiters in the up stage, then the unfair
rwsem is spinlock in gut thus without bothering to spin on owner.

The minor one is to move wakeup of current waiter in the up stage out of the
section protected by spinlock, in bid to add new waiters in parallel to wakeup.

Hillf

--- x/unfair_rwsem.c
+++ y/unfair_rwsem.c
@@ -17,35 +17,33 @@ struct unfair_rwsem_wait {
 
 static void lock_unfair_rwsem(struct unfair_rwsem *us, int read)
 {
-       struct unfair_rwsem_wait wait;
+       struct unfair_rwsem_wait wait = {
+               .task = current,
+       };
 
-       mutex_acquire(&us->dep_map, 0, 0, _RET_IP_);
        might_sleep();
-       wait.task = current;
-       for (;;) {
-               spin_lock(&us->lock);
-               if (read) {
-                       if (us->owner >= 0) {
-                               us->owner++;
-                               spin_unlock(&us->lock);
-                               return;
-                       }
-                       list_add_tail(&wait.node, &us->reader);
-               } else {
-                       if (us->owner == 0) {
-                               us->owner--;
-                               spin_unlock(&us->lock);
-                               return;
-                       }
-                       list_add_tail(&wait.node, &us->writer);
-               }
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               spin_unlock(&us->lock);
+       mutex_acquire(&us->dep_map, 0, 0, _RET_IP_);
 
-               schedule();
-               if (!read)
-                       return; /* because this is unfair rwsem */
+       spin_lock(&us->lock);
+       if (read) {
+               if (us->owner >= 0) {
+                       us->owner++;
+                       spin_unlock(&us->lock);
+                       return;
+               }
+               list_add_tail(&wait.node, &us->reader);
+       } else {
+               if (us->owner == 0) {
+                       us->owner--;
+                       spin_unlock(&us->lock);
+                       return;
+               }
+               list_add_tail(&wait.node, &us->writer);
        }
+       __set_current_state(TASK_UNINTERRUPTIBLE);
+       spin_unlock(&us->lock);
+
+       schedule();
 }
 
 void down_write_unfair_rwsem(struct unfair_rwsem *us)
@@ -64,11 +62,13 @@ static void unlock_unfair_rwsem(struct u
        int all = 0;
 
        spin_lock(&us->lock);
+
        if (us->owner < 0) {
+               BUG_ON(us->owner != -1);
                BUG_ON(read);
-               us->owner++;
-               BUG_ON(0 != us->owner);
 
+               us->owner = 0;
+               /* prefer to wake up write waiter over read waiter */
                if (!list_empty(&us->writer))
                        head = &us->writer;
                else if (!list_empty(&us->reader)) {
@@ -78,28 +78,38 @@ static void unlock_unfair_rwsem(struct u
        } else if (us->owner > 0) {
                BUG_ON(!read);
                BUG_ON(!list_empty(&us->reader));
+
                us->owner--;
                if (us->owner == 0)
                        if (!list_empty(&us->writer))
                                head = &us->writer;
-       } else
+       } else {
                BUG_ON(1);
-
+       }
        mutex_release(&us->dep_map, _RET_IP_);
        if (head) {
-               struct unfair_rwsem_wait *wait;
-               do {
+               struct unfair_rwsem_wait *wait, *next;
+               LIST_HEAD(__head);
+
+               if (!all) {
+                       us->owner = -1;
                        wait = list_first_entry(head, struct unfair_rwsem_wait, 
node);
                        list_del(&wait->node);
-                       if (all)
-                               wake_up_process(wait->task);
-                       else {
-                               /* for the sake of unfairness */
-                               us->owner = -1;
-                               wake_up_process(wait->task);
-                               break;
-                       }
-               } while (!list_empty(head));
+                       spin_unlock(&us->lock);
+                       wake_up_process(wait->task);
+                       return;
+               }
+
+               list_for_each_entry(wait, head, node)
+                       us->owner++;
+
+               list_splice_init(head, &__head);
+               spin_unlock(&us->lock);
+               head = &__head;
+
+               list_for_each_entry_safe(wait, next, head, node)
+                       wake_up_process(wait->task);
+               return;
        }
        spin_unlock(&us->lock);
 }


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to