Re: svn commit: r306224 - head/sys/kern

2016-11-12 Thread Adrian Chadd
This causes the AR9331 / Carambola2 kernel to just plainly not start. :(



-adrian


On 22 September 2016 at 21:45, Mateusz Guzik  wrote:
> Author: mjg
> Date: Fri Sep 23 04:45:11 2016
> New Revision: 306224
> URL: https://svnweb.freebsd.org/changeset/base/306224
>
> Log:
>   cache: get rid of the global lock
>
>   Add a table of vnode locks and use them along with bucketlocks to provide
>   concurrent modification support. The approach taken is to preserve the
>   current behaviour of the namecache and just lock all relevant parts before
>   any changes are made.
>
>   Lookups still require the relevant bucket to be locked.
>
>   Discussed with:   kib
>   Tested by:pho
>
> Modified:
>   head/sys/kern/subr_witness.c
>   head/sys/kern/vfs_cache.c
>
> Modified: head/sys/kern/subr_witness.c
> ==
> --- head/sys/kern/subr_witness.cFri Sep 23 03:21:40 2016
> (r306223)
> +++ head/sys/kern/subr_witness.cFri Sep 23 04:45:11 2016
> (r306224)
> @@ -625,7 +625,7 @@ static struct witness_order_list_entry o
> /*
>  * VFS namecache
>  */
> -   { "ncglobal", _class_rw },
> +   { "ncvn", _class_mtx_sleep },
> { "ncbuc", _class_rw },
> { "vnode interlock", _class_mtx_sleep },
> { "ncneg", _class_mtx_sleep },
>
> Modified: head/sys/kern/vfs_cache.c
> ==
> --- head/sys/kern/vfs_cache.c   Fri Sep 23 03:21:40 2016(r306223)
> +++ head/sys/kern/vfs_cache.c   Fri Sep 23 04:45:11 2016(r306224)
> @@ -151,21 +151,35 @@ structnamecache_ts {
>   * name is located in the cache, it will be dropped.
>   *
>   * These locks are used (in the order in which they can be taken):
> - * NAME TYPEROLE
> - * cache_lock   rwlock  global, needed for all modifications
> - * bucketlock   rwlock  for access to given hash bucket
> - * ncneg_mtxmtx negative entry LRU management
> + * NAMETYPEROLE
> + * vnodelock   mtx vnode lists and v_cache_dd field protection
> + * bucketlock  rwlock  for access to given set of hash buckets
> + * ncneg_mtx   mtx negative entry LRU management
>   *
> - * A name -> vnode lookup can be safely performed by either locking 
> cache_lock
> - * or the relevant hash bucket.
> + * Additionally, ncneg_shrink_lock mtx is used to have at most one thread
> + * shrinking the LRU list.
>   *
> - * ".." and vnode -> name lookups require cache_lock.
> + * It is legal to take multiple vnodelock and bucketlock locks. The locking
> + * order is lower address first. Both are recursive.
>   *
> - * Modifications require both cache_lock and relevant bucketlock taken for
> - * writing.
> + * "." lookups are lockless.
>   *
> - * Negative entry LRU management requires ncneg_mtx taken on top of either
> - * cache_lock or bucketlock.
> + * ".." and vnode -> name lookups require vnodelock.
> + *
> + * name -> vnode lookup requires the relevant bucketlock to be held for 
> reading.
> + *
> + * Insertions and removals of entries require involved vnodes and bucketlocks
> + * to be write-locked to prevent other threads from seeing the entry.
> + *
> + * Some lookups result in removal of the found entry (e.g. getting rid of a
> + * negative entry with the intent to create a positive one), which poses a
> + * problem when multiple threads reach the state. Similarly, two different
> + * threads can purge two different vnodes and try to remove the same name.
> + *
> + * If the already held vnode lock is lower than the second required lock, we
> + * can just take the other lock. However, in the opposite case, this could
> + * deadlock. As such, this is resolved by trylocking and if that fails 
> unlocking
> + * the first node, locking everything in order and revalidating the state.
>   */
>
>  /*
> @@ -196,15 +210,9 @@ SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor
>
>  struct nchstatsnchstats;   /* cache effectiveness 
> statistics */
>
> -static struct rwlock cache_lock;
> -RW_SYSINIT(vfscache, _lock, "ncglobal");
> -
> -#defineCACHE_TRY_WLOCK()   rw_try_wlock(_lock)
> -#defineCACHE_UPGRADE_LOCK()rw_try_upgrade(_lock)
> -#defineCACHE_RLOCK()   rw_rlock(_lock)
> -#defineCACHE_RUNLOCK() rw_runlock(_lock)
> -#defineCACHE_WLOCK()   rw_wlock(_lock)
> -#defineCACHE_WUNLOCK() rw_wunlock(_lock)
> +static struct mtx   ncneg_shrink_lock;
> +MTX_SYSINIT(vfscache_shrink_neg, _shrink_lock, "Name Cache shrink neg",
> +MTX_DEF);
>
>  static struct mtx_padalign ncneg_mtx;
>  MTX_SYSINIT(vfscache_neg, _mtx, "ncneg", MTX_DEF);
> @@ -214,6 +222,19 @@ static struct rwlock_padalign  *bucketlo
>  #defineHASH2BUCKETLOCK(hash) \
> ((struct rwlock *)([((hash) % numbucketlocks)]))
>
> +static u_int   numvnodelocks;
> 

svn commit: r306224 - head/sys/kern

2016-09-22 Thread Mateusz Guzik
Author: mjg
Date: Fri Sep 23 04:45:11 2016
New Revision: 306224
URL: https://svnweb.freebsd.org/changeset/base/306224

Log:
  cache: get rid of the global lock
  
  Add a table of vnode locks and use them along with bucketlocks to provide
  concurrent modification support. The approach taken is to preserve the
  current behaviour of the namecache and just lock all relevant parts before
  any changes are made.
  
  Lookups still require the relevant bucket to be locked.
  
  Discussed with:   kib
  Tested by:pho

Modified:
  head/sys/kern/subr_witness.c
  head/sys/kern/vfs_cache.c

Modified: head/sys/kern/subr_witness.c
==
--- head/sys/kern/subr_witness.cFri Sep 23 03:21:40 2016
(r306223)
+++ head/sys/kern/subr_witness.cFri Sep 23 04:45:11 2016
(r306224)
@@ -625,7 +625,7 @@ static struct witness_order_list_entry o
/*
 * VFS namecache
 */
-   { "ncglobal", _class_rw },
+   { "ncvn", _class_mtx_sleep },
{ "ncbuc", _class_rw },
{ "vnode interlock", _class_mtx_sleep },
{ "ncneg", _class_mtx_sleep },

Modified: head/sys/kern/vfs_cache.c
==
--- head/sys/kern/vfs_cache.c   Fri Sep 23 03:21:40 2016(r306223)
+++ head/sys/kern/vfs_cache.c   Fri Sep 23 04:45:11 2016(r306224)
@@ -151,21 +151,35 @@ structnamecache_ts {
  * name is located in the cache, it will be dropped.
  *
  * These locks are used (in the order in which they can be taken):
- * NAME TYPEROLE
- * cache_lock   rwlock  global, needed for all modifications
- * bucketlock   rwlock  for access to given hash bucket
- * ncneg_mtxmtx negative entry LRU management
+ * NAMETYPEROLE
+ * vnodelock   mtx vnode lists and v_cache_dd field protection
+ * bucketlock  rwlock  for access to given set of hash buckets
+ * ncneg_mtx   mtx negative entry LRU management
  *
- * A name -> vnode lookup can be safely performed by either locking cache_lock
- * or the relevant hash bucket.
+ * Additionally, ncneg_shrink_lock mtx is used to have at most one thread
+ * shrinking the LRU list.
  *
- * ".." and vnode -> name lookups require cache_lock.
+ * It is legal to take multiple vnodelock and bucketlock locks. The locking
+ * order is lower address first. Both are recursive.
  *
- * Modifications require both cache_lock and relevant bucketlock taken for
- * writing.
+ * "." lookups are lockless.
  *
- * Negative entry LRU management requires ncneg_mtx taken on top of either
- * cache_lock or bucketlock.
+ * ".." and vnode -> name lookups require vnodelock.
+ *
+ * name -> vnode lookup requires the relevant bucketlock to be held for 
reading.
+ *
+ * Insertions and removals of entries require involved vnodes and bucketlocks
+ * to be write-locked to prevent other threads from seeing the entry.
+ *
+ * Some lookups result in removal of the found entry (e.g. getting rid of a
+ * negative entry with the intent to create a positive one), which poses a
+ * problem when multiple threads reach the state. Similarly, two different
+ * threads can purge two different vnodes and try to remove the same name.
+ *
+ * If the already held vnode lock is lower than the second required lock, we
+ * can just take the other lock. However, in the opposite case, this could
+ * deadlock. As such, this is resolved by trylocking and if that fails 
unlocking
+ * the first node, locking everything in order and revalidating the state.
  */
 
 /*
@@ -196,15 +210,9 @@ SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor
 
 struct nchstatsnchstats;   /* cache effectiveness 
statistics */
 
-static struct rwlock cache_lock;
-RW_SYSINIT(vfscache, _lock, "ncglobal");
-
-#defineCACHE_TRY_WLOCK()   rw_try_wlock(_lock)
-#defineCACHE_UPGRADE_LOCK()rw_try_upgrade(_lock)
-#defineCACHE_RLOCK()   rw_rlock(_lock)
-#defineCACHE_RUNLOCK() rw_runlock(_lock)
-#defineCACHE_WLOCK()   rw_wlock(_lock)
-#defineCACHE_WUNLOCK() rw_wunlock(_lock)
+static struct mtx   ncneg_shrink_lock;
+MTX_SYSINIT(vfscache_shrink_neg, _shrink_lock, "Name Cache shrink neg",
+MTX_DEF);
 
 static struct mtx_padalign ncneg_mtx;
 MTX_SYSINIT(vfscache_neg, _mtx, "ncneg", MTX_DEF);
@@ -214,6 +222,19 @@ static struct rwlock_padalign  *bucketlo
 #defineHASH2BUCKETLOCK(hash) \
((struct rwlock *)([((hash) % numbucketlocks)]))
 
+static u_int   numvnodelocks;
+static struct mtx *vnodelocks;
+static inline struct mtx *
+VP2VNODELOCK(struct vnode *vp)
+{
+   struct mtx *vlp;
+
+   if (vp == NULL)
+   return (NULL);
+   vlp = [(((uintptr_t)(vp) >> 8) % numvnodelocks)];
+   return (vlp);
+}
+
 /*
  * UMA zones for the VFS cache.
  *
@@ -329,19 +350,49 @@ STATNODE_COUNTER(numfullpathfail2,
 "Number