Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction

2019-08-10 Thread Vitaly Wool
Hi Henry,

Den fre 9 aug. 2019 6:46 emHenry Burns  skrev:
>
> In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq).
> However, we have no guarantee that migration isn't happening in the
> background at that time.
>
> Migration directly calls queue_work_on(pool->compact_wq), if destruction
> wins that race we are using a destroyed workqueue.


Thanks for the fix. Would you please comment why adding
flush_workqueue() isn't enough?

~Vitaly
>
>
> Signed-off-by: Henry Burns 
> ---
>  mm/z3fold.c | 51 +++
>  1 file changed, 51 insertions(+)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 78447cecfffa..e136d97ce56e 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  /*
> @@ -161,8 +162,10 @@ struct z3fold_pool {
> const struct zpool_ops *zpool_ops;
> struct workqueue_struct *compact_wq;
> struct workqueue_struct *release_wq;
> +   struct wait_queue_head isolate_wait;
> struct work_struct work;
> struct inode *inode;
> +   int isolated_pages;
>  };
>
>  /*
> @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char 
> *name, gfp_t gfp,
> goto out_c;
> spin_lock_init(>lock);
> spin_lock_init(>stale_lock);
> +   init_waitqueue_head(>isolate_wait);
> pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
> if (!pool->unbuddied)
> goto out_pool;
> @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char 
> *name, gfp_t gfp,
> return NULL;
>  }
>
> +static bool pool_isolated_are_drained(struct z3fold_pool *pool)
> +{
> +   bool ret;
> +
> +   spin_lock(>lock);
> +   ret = pool->isolated_pages == 0;
> +   spin_unlock(>lock);
> +   return ret;
> +}
>  /**
>   * z3fold_destroy_pool() - destroys an existing z3fold pool
>   * @pool:  the z3fold pool to be destroyed
> @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>  {
> kmem_cache_destroy(pool->c_handle);
>
> +   /*
> +* We need to ensure that no pages are being migrated while we destroy
> +* these workqueues, as migration can queue work on either of the
> +* workqueues.
> +*/
> +   wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
> +
> /*
>  * We need to destroy pool->compact_wq before pool->release_wq,
>  * as any pending work on pool->compact_wq will call
> @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool 
> *pool)
> return atomic64_read(>pages_nr);
>  }
>
> +/*
> + * z3fold_dec_isolated() expects to be called while pool->lock is held.
> + */
> +static void z3fold_dec_isolated(struct z3fold_pool *pool)
> +{
> +   assert_spin_locked(>lock);
> +   VM_BUG_ON(pool->isolated_pages <= 0);
> +   pool->isolated_pages--;
> +
> +   /*
> +* If we have no more isolated pages, we have to see if
> +* z3fold_destroy_pool() is waiting for a signal.
> +*/
> +   if (pool->isolated_pages == 0 && 
> waitqueue_active(>isolate_wait))
> +   wake_up_all(>isolate_wait);
> +}
> +
> +static void z3fold_inc_isolated(struct z3fold_pool *pool)
> +{
> +   pool->isolated_pages++;
> +}
> +
>  static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  {
> struct z3fold_header *zhdr;
> @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, 
> isolate_mode_t mode)
> spin_lock(>lock);
> if (!list_empty(>lru))
> list_del(>lru);
> +   z3fold_inc_isolated(pool);
> spin_unlock(>lock);
> z3fold_page_unlock(zhdr);
> return true;
> @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space 
> *mapping, struct page *newpa
>
> queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work);
>
> +   spin_lock(>lock);
> +   z3fold_dec_isolated(pool);
> +   spin_unlock(>lock);
> +
> page_mapcount_reset(page);
> put_page(page);
> return 0;
> @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
> INIT_LIST_HEAD(>lru);
> if (kref_put(>refcount, release_z3fold_page_locked)) {
> atomic64_dec(>pages_nr);
> +   spin_lock(>lock);
> +   z3fold_dec_isolated(pool);
> +   spin_unlock(>lock);
> return;
> }
> spin_lock(>lock);
> list_add(>lru, >lru);
> +   z3fold_dec_isolated(pool);
> spin_unlock(>lock);
> z3fold_page_unlock(zhdr);
>  }
> --
> 2.22.0.770.g0f2c4a37fd-goog
>


Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction

2019-08-09 Thread Henry Burns
I've just CC'd a personal email here so that I can respond to any
replies after today.

On Fri, Aug 9, 2019 at 9:46 AM Henry Burns  wrote:
>
> In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq).
> However, we have no guarantee that migration isn't happening in the
> background at that time.
>
> Migration directly calls queue_work_on(pool->compact_wq), if destruction
> wins that race we are using a destroyed workqueue.
>
> Signed-off-by: Henry Burns 
> ---
>  mm/z3fold.c | 51 +++
>  1 file changed, 51 insertions(+)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 78447cecfffa..e136d97ce56e 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  /*
> @@ -161,8 +162,10 @@ struct z3fold_pool {
> const struct zpool_ops *zpool_ops;
> struct workqueue_struct *compact_wq;
> struct workqueue_struct *release_wq;
> +   struct wait_queue_head isolate_wait;
> struct work_struct work;
> struct inode *inode;
> +   int isolated_pages;
>  };
>
>  /*
> @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char 
> *name, gfp_t gfp,
> goto out_c;
> spin_lock_init(>lock);
> spin_lock_init(>stale_lock);
> +   init_waitqueue_head(>isolate_wait);
> pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
> if (!pool->unbuddied)
> goto out_pool;
> @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char 
> *name, gfp_t gfp,
> return NULL;
>  }
>
> +static bool pool_isolated_are_drained(struct z3fold_pool *pool)
> +{
> +   bool ret;
> +
> +   spin_lock(>lock);
> +   ret = pool->isolated_pages == 0;
> +   spin_unlock(>lock);
> +   return ret;
> +}
>  /**
>   * z3fold_destroy_pool() - destroys an existing z3fold pool
>   * @pool:  the z3fold pool to be destroyed
> @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>  {
> kmem_cache_destroy(pool->c_handle);
>
> +   /*
> +* We need to ensure that no pages are being migrated while we destroy
> +* these workqueues, as migration can queue work on either of the
> +* workqueues.
> +*/
> +   wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
> +
> /*
>  * We need to destroy pool->compact_wq before pool->release_wq,
>  * as any pending work on pool->compact_wq will call
> @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool 
> *pool)
> return atomic64_read(>pages_nr);
>  }
>
> +/*
> + * z3fold_dec_isolated() expects to be called while pool->lock is held.
> + */
> +static void z3fold_dec_isolated(struct z3fold_pool *pool)
> +{
> +   assert_spin_locked(>lock);
> +   VM_BUG_ON(pool->isolated_pages <= 0);
> +   pool->isolated_pages--;
> +
> +   /*
> +* If we have no more isolated pages, we have to see if
> +* z3fold_destroy_pool() is waiting for a signal.
> +*/
> +   if (pool->isolated_pages == 0 && 
> waitqueue_active(>isolate_wait))
> +   wake_up_all(>isolate_wait);
> +}
> +
> +static void z3fold_inc_isolated(struct z3fold_pool *pool)
> +{
> +   pool->isolated_pages++;
> +}
> +
>  static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  {
> struct z3fold_header *zhdr;
> @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, 
> isolate_mode_t mode)
> spin_lock(>lock);
> if (!list_empty(>lru))
> list_del(>lru);
> +   z3fold_inc_isolated(pool);
> spin_unlock(>lock);
> z3fold_page_unlock(zhdr);
> return true;
> @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space 
> *mapping, struct page *newpa
>
> queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work);
>
> +   spin_lock(>lock);
> +   z3fold_dec_isolated(pool);
> +   spin_unlock(>lock);
> +
> page_mapcount_reset(page);
> put_page(page);
> return 0;
> @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
> INIT_LIST_HEAD(>lru);
> if (kref_put(>refcount, release_z3fold_page_locked)) {
> atomic64_dec(>pages_nr);
> +   spin_lock(>lock);
> +   z3fold_dec_isolated(pool);
> +   spin_unlock(>lock);
> return;
> }
> spin_lock(>lock);
> list_add(>lru, >lru);
> +   z3fold_dec_isolated(pool);
> spin_unlock(>lock);
> z3fold_page_unlock(zhdr);
>  }
> --
> 2.22.0.770.g0f2c4a37fd-goog
>


[PATCH] mm/z3fold.c: Fix race between migration and destruction

2019-08-09 Thread Henry Burns
In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq).
However, we have no guarantee that migration isn't happening in the
background at that time.

Migration directly calls queue_work_on(pool->compact_wq), if destruction
wins that race we are using a destroyed workqueue.

Signed-off-by: Henry Burns 
---
 mm/z3fold.c | 51 +++
 1 file changed, 51 insertions(+)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 78447cecfffa..e136d97ce56e 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -161,8 +162,10 @@ struct z3fold_pool {
const struct zpool_ops *zpool_ops;
struct workqueue_struct *compact_wq;
struct workqueue_struct *release_wq;
+   struct wait_queue_head isolate_wait;
struct work_struct work;
struct inode *inode;
+   int isolated_pages;
 };
 
 /*
@@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char 
*name, gfp_t gfp,
goto out_c;
spin_lock_init(>lock);
spin_lock_init(>stale_lock);
+   init_waitqueue_head(>isolate_wait);
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
if (!pool->unbuddied)
goto out_pool;
@@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char 
*name, gfp_t gfp,
return NULL;
 }
 
+static bool pool_isolated_are_drained(struct z3fold_pool *pool)
+{
+   bool ret;
+
+   spin_lock(>lock);
+   ret = pool->isolated_pages == 0;
+   spin_unlock(>lock);
+   return ret;
+}
 /**
  * z3fold_destroy_pool() - destroys an existing z3fold pool
  * @pool:  the z3fold pool to be destroyed
@@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
kmem_cache_destroy(pool->c_handle);
 
+   /*
+* We need to ensure that no pages are being migrated while we destroy
+* these workqueues, as migration can queue work on either of the
+* workqueues.
+*/
+   wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
+
/*
 * We need to destroy pool->compact_wq before pool->release_wq,
 * as any pending work on pool->compact_wq will call
@@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
return atomic64_read(>pages_nr);
 }
 
+/*
+ * z3fold_dec_isolated() expects to be called while pool->lock is held.
+ */
+static void z3fold_dec_isolated(struct z3fold_pool *pool)
+{
+   assert_spin_locked(>lock);
+   VM_BUG_ON(pool->isolated_pages <= 0);
+   pool->isolated_pages--;
+
+   /*
+* If we have no more isolated pages, we have to see if
+* z3fold_destroy_pool() is waiting for a signal.
+*/
+   if (pool->isolated_pages == 0 && waitqueue_active(>isolate_wait))
+   wake_up_all(>isolate_wait);
+}
+
+static void z3fold_inc_isolated(struct z3fold_pool *pool)
+{
+   pool->isolated_pages++;
+}
+
 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 {
struct z3fold_header *zhdr;
@@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, 
isolate_mode_t mode)
spin_lock(>lock);
if (!list_empty(>lru))
list_del(>lru);
+   z3fold_inc_isolated(pool);
spin_unlock(>lock);
z3fold_page_unlock(zhdr);
return true;
@@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space 
*mapping, struct page *newpa
 
queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work);
 
+   spin_lock(>lock);
+   z3fold_dec_isolated(pool);
+   spin_unlock(>lock);
+
page_mapcount_reset(page);
put_page(page);
return 0;
@@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page)
INIT_LIST_HEAD(>lru);
if (kref_put(>refcount, release_z3fold_page_locked)) {
atomic64_dec(>pages_nr);
+   spin_lock(>lock);
+   z3fold_dec_isolated(pool);
+   spin_unlock(>lock);
return;
}
spin_lock(>lock);
list_add(>lru, >lru);
+   z3fold_dec_isolated(pool);
spin_unlock(>lock);
z3fold_page_unlock(zhdr);
 }
-- 
2.22.0.770.g0f2c4a37fd-goog