Re: svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Konstantin Belousov
On Fri, Nov 13, 2020 at 07:30:47PM +0100, Mateusz Guzik wrote:
> On 11/13/20, Konstantin Belousov  wrote:
> > +static u_long vn_lock_pair_pause_cnt;
> > +SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
> > +_lock_pair_pause_cnt, 0,
> > +"Count of vn_lock_pair deadlocks");
> > +
> > +static void
> > +vn_lock_pair_pause(const char *wmesg)
> > +{
> > +   atomic_add_long(_lock_pair_pause_cnt, 1);
> > +   pause(wmesg, prng32_bounded(hz / 10));
> > +}
> > +
> > +/*
> > + * Lock pair of vnodes vp1, vp2, avoiding lock order reversal.
> > + * vp1_locked indicates whether vp1 is exclusively locked; if not, vp1
> > + * must be unlocked.  Same for vp2 and vp2_locked.  One of the vnodes
> > + * can be NULL.
> > + *
> > + * The function returns with both vnodes exclusively locked, and
> > + * guarantees that it does not create lock order reversal with other
> > + * threads during its execution.  Both vnodes could be unlocked
> > + * temporary (and reclaimed).
> > + */
> > +void
> > +vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
> > +bool vp2_locked)
> > +{
> > +   int error;
> > +
> > +   if (vp1 == NULL && vp2 == NULL)
> > +   return;
> > +   if (vp1 != NULL) {
> > +   if (vp1_locked)
> > +   ASSERT_VOP_ELOCKED(vp1, "vp1");
> > +   else
> > +   ASSERT_VOP_UNLOCKED(vp1, "vp1");
> > +   } else {
> > +   vp1_locked = true;
> > +   }
> > +   if (vp2 != NULL) {
> > +   if (vp2_locked)
> > +   ASSERT_VOP_ELOCKED(vp2, "vp2");
> > +   else
> > +   ASSERT_VOP_UNLOCKED(vp2, "vp2");
> > +   } else {
> > +   vp2_locked = true;
> > +   }
> > +   if (!vp1_locked && !vp2_locked) {
> > +   vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
> > +   vp1_locked = true;
> > +   }
> > +
> > +   for (;;) {
> > +   if (vp1_locked && vp2_locked)
> > +   break;
> > +   if (vp1_locked && vp2 != NULL) {
> > +   if (vp1 != NULL) {
> > +   error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT,
> > +   __FILE__, __LINE__);
> > +   if (error == 0)
> > +   break;
> > +   VOP_UNLOCK(vp1);
> > +   vp1_locked = false;
> > +   vn_lock_pair_pause("vlp1");
> > +   }
> > +   vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
> > +   vp2_locked = true;
> > +   }
> > +   if (vp2_locked && vp1 != NULL) {
> > +   if (vp2 != NULL) {
> > +   error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT,
> > +   __FILE__, __LINE__);
> > +   if (error == 0)
> > +   break;
> > +   VOP_UNLOCK(vp2);
> > +   vp2_locked = false;
> > +   vn_lock_pair_pause("vlp2");
> > +   }
> > +   vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
> > +   vp1_locked = true;
> > +   }
> > +   }
> > +   if (vp1 != NULL)
> > +   ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
> > +   if (vp2 != NULL)
> > +   ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
> >  }
> >
> 
> Multiple callers who get here with flipped addresses can end up
> failing against each other in an indefinite loop.
> 
> Instead, after initial trylocking fails, the routine should establish
> ordering it will follow for itself, for example by sorting by address.
> 
> Then pseudo-code would be:
> retry:
> vn_lock(lower, ...);
> if (!VOP_LOCK(higher, LK_NOWAIT ...)) {
>  vn_unlock(lower);
>  vn_lock(higher);
>  vn_unlock(higher);
>  goto retry;
> }
> 
> Note this also eliminates the pause.

I disagree.  It will conflict with normal locking order with 50% probability.
My code switches between two orders, eventually getting out of this situation.
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Konstantin Belousov
On Fri, Nov 13, 2020 at 08:24:30AM -0800, Conrad Meyer wrote:
> Hi Konstantin,
> 
> On Fri, Nov 13, 2020 at 1:32 AM Konstantin Belousov  wrote:
> >
> > Author: kib
> > Date: Fri Nov 13 09:31:57 2020
> > New Revision: 367631
> > URL: https://svnweb.freebsd.org/changeset/base/367631
> >
> > Log:
> >   Implement vn_lock_pair().
> >
> > Modified: head/sys/kern/vfs_vnops.c
> > ==
> > --- head/sys/kern/vfs_vnops.c   Fri Nov 13 02:05:45 2020(r367630)
> > +++ head/sys/kern/vfs_vnops.c   Fri Nov 13 09:31:57 2020(r367631)
> > @@ -3317,4 +3325,92 @@ vn_fallocate(struct file *fp, off_t offset, off_t 
> > len,
> > ...
> > +
> > +static void
> > +vn_lock_pair_pause(const char *wmesg)
> > +{
> > +   atomic_add_long(_lock_pair_pause_cnt, 1);
> > +   pause(wmesg, prng32_bounded(hz / 10));
> > +}
> 
> This function is called when the try-lock of the second lock in the
> pair (either order) fails.  The back-off period is up to 100ms,
> expected average 50ms.  That seems really high?
It is called only in deadlock avoidance case, where we already have to drop
to slow path for other reasons (this is coming in the patch that I hope to
commit today).

My selection of numbers were driven by more or less realistic estimates
of the vnode lock ownership time for sync io on very busy HDD, there was
a short discussion of it with Mark in review.

> 
> Separately: prng32_bounded() may return 0, which is transparently
> converted to the equivalent of 1 by pause_sbt(9).  This means a 1 tick
> pause is marginally more likely than any other possible duration.  It
> probably doesn't matter.
I do not quite understand the point.  Do you want the 0->1 conversion
be explicit there ?  Or something else ?
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Mateusz Guzik
On 11/13/20, Konstantin Belousov  wrote:
> +static u_long vn_lock_pair_pause_cnt;
> +SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
> +_lock_pair_pause_cnt, 0,
> +"Count of vn_lock_pair deadlocks");
> +
> +static void
> +vn_lock_pair_pause(const char *wmesg)
> +{
> + atomic_add_long(_lock_pair_pause_cnt, 1);
> + pause(wmesg, prng32_bounded(hz / 10));
> +}
> +
> +/*
> + * Lock pair of vnodes vp1, vp2, avoiding lock order reversal.
> + * vp1_locked indicates whether vp1 is exclusively locked; if not, vp1
> + * must be unlocked.  Same for vp2 and vp2_locked.  One of the vnodes
> + * can be NULL.
> + *
> + * The function returns with both vnodes exclusively locked, and
> + * guarantees that it does not create lock order reversal with other
> + * threads during its execution.  Both vnodes could be unlocked
> + * temporary (and reclaimed).
> + */
> +void
> +vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
> +bool vp2_locked)
> +{
> + int error;
> +
> + if (vp1 == NULL && vp2 == NULL)
> + return;
> + if (vp1 != NULL) {
> + if (vp1_locked)
> + ASSERT_VOP_ELOCKED(vp1, "vp1");
> + else
> + ASSERT_VOP_UNLOCKED(vp1, "vp1");
> + } else {
> + vp1_locked = true;
> + }
> + if (vp2 != NULL) {
> + if (vp2_locked)
> + ASSERT_VOP_ELOCKED(vp2, "vp2");
> + else
> + ASSERT_VOP_UNLOCKED(vp2, "vp2");
> + } else {
> + vp2_locked = true;
> + }
> + if (!vp1_locked && !vp2_locked) {
> + vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
> + vp1_locked = true;
> + }
> +
> + for (;;) {
> + if (vp1_locked && vp2_locked)
> + break;
> + if (vp1_locked && vp2 != NULL) {
> + if (vp1 != NULL) {
> + error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT,
> + __FILE__, __LINE__);
> + if (error == 0)
> + break;
> + VOP_UNLOCK(vp1);
> + vp1_locked = false;
> + vn_lock_pair_pause("vlp1");
> + }
> + vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
> + vp2_locked = true;
> + }
> + if (vp2_locked && vp1 != NULL) {
> + if (vp2 != NULL) {
> + error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT,
> + __FILE__, __LINE__);
> + if (error == 0)
> + break;
> + VOP_UNLOCK(vp2);
> + vp2_locked = false;
> + vn_lock_pair_pause("vlp2");
> + }
> + vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
> + vp1_locked = true;
> + }
> + }
> + if (vp1 != NULL)
> + ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
> + if (vp2 != NULL)
> + ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
>  }
>

Multiple callers who get here with flipped addresses can end up
failing against each other in an indefinite loop.

Instead, after initial trylocking fails, the routine should establish
ordering it will follow for itself, for example by sorting by address.

Then pseudo-code would be:
retry:
vn_lock(lower, ...);
if (!VOP_LOCK(higher, LK_NOWAIT ...)) {
 vn_unlock(lower);
 vn_lock(higher);
 vn_unlock(higher);
 goto retry;
}

Note this also eliminates the pause.

-- 
Mateusz Guzik 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Conrad Meyer
Hi Konstantin,

On Fri, Nov 13, 2020 at 1:32 AM Konstantin Belousov  wrote:
>
> Author: kib
> Date: Fri Nov 13 09:31:57 2020
> New Revision: 367631
> URL: https://svnweb.freebsd.org/changeset/base/367631
>
> Log:
>   Implement vn_lock_pair().
>
> Modified: head/sys/kern/vfs_vnops.c
> ==
> --- head/sys/kern/vfs_vnops.c   Fri Nov 13 02:05:45 2020(r367630)
> +++ head/sys/kern/vfs_vnops.c   Fri Nov 13 09:31:57 2020(r367631)
> @@ -3317,4 +3325,92 @@ vn_fallocate(struct file *fp, off_t offset, off_t len,
> ...
> +
> +static void
> +vn_lock_pair_pause(const char *wmesg)
> +{
> +   atomic_add_long(_lock_pair_pause_cnt, 1);
> +   pause(wmesg, prng32_bounded(hz / 10));
> +}

This function is called when the try-lock of the second lock in the
pair (either order) fails.  The back-off period is up to 100ms,
expected average 50ms.  That seems really high?

Separately: prng32_bounded() may return 0, which is transparently
converted to the equivalent of 1 by pause_sbt(9).  This means a 1 tick
pause is marginally more likely than any other possible duration.  It
probably doesn't matter.

Thanks,
Conrad

> +
> +/*
> + * Lock pair of vnodes vp1, vp2, avoiding lock order reversal.
> + * vp1_locked indicates whether vp1 is exclusively locked; if not, vp1
> + * must be unlocked.  Same for vp2 and vp2_locked.  One of the vnodes
> + * can be NULL.
> + *
> + * The function returns with both vnodes exclusively locked, and
> + * guarantees that it does not create lock order reversal with other
> + * threads during its execution.  Both vnodes could be unlocked
> + * temporary (and reclaimed).
> + */
> +void
> +vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
> +bool vp2_locked)
> +{
> +   int error;
> +
> +   if (vp1 == NULL && vp2 == NULL)
> +   return;
> +   if (vp1 != NULL) {
> +   if (vp1_locked)
> +   ASSERT_VOP_ELOCKED(vp1, "vp1");
> +   else
> +   ASSERT_VOP_UNLOCKED(vp1, "vp1");
> +   } else {
> +   vp1_locked = true;
> +   }
> +   if (vp2 != NULL) {
> +   if (vp2_locked)
> +   ASSERT_VOP_ELOCKED(vp2, "vp2");
> +   else
> +   ASSERT_VOP_UNLOCKED(vp2, "vp2");
> +   } else {
> +   vp2_locked = true;
> +   }
> +   if (!vp1_locked && !vp2_locked) {
> +   vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
> +   vp1_locked = true;
> +   }
> +
> +   for (;;) {
> +   if (vp1_locked && vp2_locked)
> +   break;
> +   if (vp1_locked && vp2 != NULL) {
> +   if (vp1 != NULL) {
> +   error = VOP_LOCK1(vp2, LK_EXCLUSIVE | 
> LK_NOWAIT,
> +   __FILE__, __LINE__);
> +   if (error == 0)
> +   break;
> +   VOP_UNLOCK(vp1);
> +   vp1_locked = false;
> +   vn_lock_pair_pause("vlp1");

(Pause called here and in similar elided case for vp2 -> vp1 below.)

> +   }
> +   vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
> +   vp2_locked = true;
> +   }
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Konstantin Belousov
On Fri, Nov 13, 2020 at 09:31:58AM +, Konstantin Belousov wrote:
> Author: kib
> Date: Fri Nov 13 09:31:57 2020
> New Revision: 367631
> URL: https://svnweb.freebsd.org/changeset/base/367631
> 
> Log:
>   Implement vn_lock_pair().
>   
>   In collaboration with:  pho
>   Reviewed by:mckusick (previous version), markj (previous version)
>   Tested by:  markj (syzkaller), pho
>   Sponsored by:   The FreeBSD Foundation
>   Differential revision:  https://reviews.freebsd.org/D26136
> 
> Modified:
>   head/sys/kern/vfs_vnops.c
>   head/sys/sys/vnode.h
> 
> Modified: head/sys/kern/vfs_vnops.c
> ==
> --- head/sys/kern/vfs_vnops.c Fri Nov 13 02:05:45 2020(r367630)
> +++ head/sys/kern/vfs_vnops.c Fri Nov 13 09:31:57 2020(r367631)
> @@ -275,6 +276,10 @@ restart:
>   vn_finished_write(mp);
>   if (error) {
>   NDFREE(ndp, NDF_ONLY_PNBUF);
> + if (error == ERELOOKUP) {
> + NDREINIT(ndp);
> + goto restart;
> + }
>   return (error);
>   }
>   fmode &= ~O_TRUNC;
> @@ -1524,6 +1529,7 @@ vn_truncate(struct file *fp, off_t length, struct ucre
>  
>   vp = fp->f_vnode;
>  
> +retry:
>   /*
>* Lock the whole range for truncation.  Otherwise split i/o
>* might happen partly before and partly after the truncation.
> @@ -1550,6 +1556,8 @@ out:
>   vn_finished_write(mp);
>  out1:
>   vn_rangelock_unlock(vp, rl_cookie);
> + if (error == ERELOOKUP)
> + goto retry;
>   return (error);
>  }
These chunks really belong to r367632 and not r367631, they leaked there
due to my mishandling of the split.  I am sorry for that, but I do not think
it is reasonable to revert and re-commit.
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r367631 - in head/sys: kern sys

2020-11-13 Thread Konstantin Belousov
Author: kib
Date: Fri Nov 13 09:31:57 2020
New Revision: 367631
URL: https://svnweb.freebsd.org/changeset/base/367631

Log:
  Implement vn_lock_pair().
  
  In collaboration with:pho
  Reviewed by:  mckusick (previous version), markj (previous version)
  Tested by:markj (syzkaller), pho
  Sponsored by: The FreeBSD Foundation
  Differential revision:https://reviews.freebsd.org/D26136

Modified:
  head/sys/kern/vfs_vnops.c
  head/sys/sys/vnode.h

Modified: head/sys/kern/vfs_vnops.c
==
--- head/sys/kern/vfs_vnops.c   Fri Nov 13 02:05:45 2020(r367630)
+++ head/sys/kern/vfs_vnops.c   Fri Nov 13 09:31:57 2020(r367631)
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -275,6 +276,10 @@ restart:
vn_finished_write(mp);
if (error) {
NDFREE(ndp, NDF_ONLY_PNBUF);
+   if (error == ERELOOKUP) {
+   NDREINIT(ndp);
+   goto restart;
+   }
return (error);
}
fmode &= ~O_TRUNC;
@@ -1524,6 +1529,7 @@ vn_truncate(struct file *fp, off_t length, struct ucre
 
vp = fp->f_vnode;
 
+retry:
/*
 * Lock the whole range for truncation.  Otherwise split i/o
 * might happen partly before and partly after the truncation.
@@ -1550,6 +1556,8 @@ out:
vn_finished_write(mp);
 out1:
vn_rangelock_unlock(vp, rl_cookie);
+   if (error == ERELOOKUP)
+   goto retry;
return (error);
 }
 
@@ -3317,4 +3325,92 @@ vn_fallocate(struct file *fp, off_t offset, off_t len,
}
 
return (error);
+}
+
+static u_long vn_lock_pair_pause_cnt;
+SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
+_lock_pair_pause_cnt, 0,
+"Count of vn_lock_pair deadlocks");
+
+static void
+vn_lock_pair_pause(const char *wmesg)
+{
+   atomic_add_long(_lock_pair_pause_cnt, 1);
+   pause(wmesg, prng32_bounded(hz / 10));
+}
+
+/*
+ * Lock pair of vnodes vp1, vp2, avoiding lock order reversal.
+ * vp1_locked indicates whether vp1 is exclusively locked; if not, vp1
+ * must be unlocked.  Same for vp2 and vp2_locked.  One of the vnodes
+ * can be NULL.
+ *
+ * The function returns with both vnodes exclusively locked, and
+ * guarantees that it does not create lock order reversal with other
+ * threads during its execution.  Both vnodes could be unlocked
+ * temporary (and reclaimed).
+ */
+void
+vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
+bool vp2_locked)
+{
+   int error;
+
+   if (vp1 == NULL && vp2 == NULL)
+   return;
+   if (vp1 != NULL) {
+   if (vp1_locked)
+   ASSERT_VOP_ELOCKED(vp1, "vp1");
+   else
+   ASSERT_VOP_UNLOCKED(vp1, "vp1");
+   } else {
+   vp1_locked = true;
+   }
+   if (vp2 != NULL) {
+   if (vp2_locked)
+   ASSERT_VOP_ELOCKED(vp2, "vp2");
+   else
+   ASSERT_VOP_UNLOCKED(vp2, "vp2");
+   } else {
+   vp2_locked = true;
+   }
+   if (!vp1_locked && !vp2_locked) {
+   vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+   vp1_locked = true;
+   }
+
+   for (;;) {
+   if (vp1_locked && vp2_locked)
+   break;
+   if (vp1_locked && vp2 != NULL) {
+   if (vp1 != NULL) {
+   error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT,
+   __FILE__, __LINE__);
+   if (error == 0)
+   break;
+   VOP_UNLOCK(vp1);
+   vp1_locked = false;
+   vn_lock_pair_pause("vlp1");
+   }
+   vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
+   vp2_locked = true;
+   }
+   if (vp2_locked && vp1 != NULL) {
+   if (vp2 != NULL) {
+   error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT,
+   __FILE__, __LINE__);
+   if (error == 0)
+   break;
+   VOP_UNLOCK(vp2);
+   vp2_locked = false;
+   vn_lock_pair_pause("vlp2");
+   }
+   vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+   vp1_locked = true;
+   }
+   }
+   if (vp1 != NULL)
+