Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction
Hi Henry, Den fre 9 aug. 2019 6:46 emHenry Burns skrev: > > In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq). > However, we have no guarantee that migration isn't happening in the > background at that time. > > Migration directly calls queue_work_on(pool->compact_wq), if destruction > wins that race we are using a destroyed workqueue. Thanks for the fix. Would you please comment why adding flush_workqueue() isn't enough? ~Vitaly > > > Signed-off-by: Henry Burns > --- > mm/z3fold.c | 51 +++ > 1 file changed, 51 insertions(+) > > diff --git a/mm/z3fold.c b/mm/z3fold.c > index 78447cecfffa..e136d97ce56e 100644 > --- a/mm/z3fold.c > +++ b/mm/z3fold.c > @@ -40,6 +40,7 @@ > #include > #include > #include > +#include > #include > > /* > @@ -161,8 +162,10 @@ struct z3fold_pool { > const struct zpool_ops *zpool_ops; > struct workqueue_struct *compact_wq; > struct workqueue_struct *release_wq; > + struct wait_queue_head isolate_wait; > struct work_struct work; > struct inode *inode; > + int isolated_pages; > }; > > /* > @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char > *name, gfp_t gfp, > goto out_c; > spin_lock_init(>lock); > spin_lock_init(>stale_lock); > + init_waitqueue_head(>isolate_wait); > pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); > if (!pool->unbuddied) > goto out_pool; > @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char > *name, gfp_t gfp, > return NULL; > } > > +static bool pool_isolated_are_drained(struct z3fold_pool *pool) > +{ > + bool ret; > + > + spin_lock(>lock); > + ret = pool->isolated_pages == 0; > + spin_unlock(>lock); > + return ret; > +} > /** > * z3fold_destroy_pool() - destroys an existing z3fold pool > * @pool: the z3fold pool to be destroyed > @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) > { > kmem_cache_destroy(pool->c_handle); > > + /* > +* We need to ensure that no pages are being migrated while we destroy > +* these workqueues, as migration can queue work on either of the > +* workqueues. > +*/ > + wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); > + > /* > * We need to destroy pool->compact_wq before pool->release_wq, > * as any pending work on pool->compact_wq will call > @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool > *pool) > return atomic64_read(>pages_nr); > } > > +/* > + * z3fold_dec_isolated() expects to be called while pool->lock is held. > + */ > +static void z3fold_dec_isolated(struct z3fold_pool *pool) > +{ > + assert_spin_locked(>lock); > + VM_BUG_ON(pool->isolated_pages <= 0); > + pool->isolated_pages--; > + > + /* > +* If we have no more isolated pages, we have to see if > +* z3fold_destroy_pool() is waiting for a signal. > +*/ > + if (pool->isolated_pages == 0 && > waitqueue_active(>isolate_wait)) > + wake_up_all(>isolate_wait); > +} > + > +static void z3fold_inc_isolated(struct z3fold_pool *pool) > +{ > + pool->isolated_pages++; > +} > + > static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) > { > struct z3fold_header *zhdr; > @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, > isolate_mode_t mode) > spin_lock(>lock); > if (!list_empty(>lru)) > list_del(>lru); > + z3fold_inc_isolated(pool); > spin_unlock(>lock); > z3fold_page_unlock(zhdr); > return true; > @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space > *mapping, struct page *newpa > > queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work); > > + spin_lock(>lock); > + z3fold_dec_isolated(pool); > + spin_unlock(>lock); > + > page_mapcount_reset(page); > put_page(page); > return 0; > @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page) > INIT_LIST_HEAD(>lru); > if (kref_put(>refcount, release_z3fold_page_locked)) { > atomic64_dec(>pages_nr); > + spin_lock(>lock); > + z3fold_dec_isolated(pool); > + spin_unlock(>lock); > return; > } > spin_lock(>lock); > list_add(>lru, >lru); > + z3fold_dec_isolated(pool); > spin_unlock(>lock); > z3fold_page_unlock(zhdr); > } > -- > 2.22.0.770.g0f2c4a37fd-goog >
Re: [PATCH] mm/z3fold.c: Fix race between migration and destruction
I've just CC'd a personal email here so that I can respond to any replies after today. On Fri, Aug 9, 2019 at 9:46 AM Henry Burns wrote: > > In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq). > However, we have no guarantee that migration isn't happening in the > background at that time. > > Migration directly calls queue_work_on(pool->compact_wq), if destruction > wins that race we are using a destroyed workqueue. > > Signed-off-by: Henry Burns > --- > mm/z3fold.c | 51 +++ > 1 file changed, 51 insertions(+) > > diff --git a/mm/z3fold.c b/mm/z3fold.c > index 78447cecfffa..e136d97ce56e 100644 > --- a/mm/z3fold.c > +++ b/mm/z3fold.c > @@ -40,6 +40,7 @@ > #include > #include > #include > +#include > #include > > /* > @@ -161,8 +162,10 @@ struct z3fold_pool { > const struct zpool_ops *zpool_ops; > struct workqueue_struct *compact_wq; > struct workqueue_struct *release_wq; > + struct wait_queue_head isolate_wait; > struct work_struct work; > struct inode *inode; > + int isolated_pages; > }; > > /* > @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char > *name, gfp_t gfp, > goto out_c; > spin_lock_init(>lock); > spin_lock_init(>stale_lock); > + init_waitqueue_head(>isolate_wait); > pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); > if (!pool->unbuddied) > goto out_pool; > @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char > *name, gfp_t gfp, > return NULL; > } > > +static bool pool_isolated_are_drained(struct z3fold_pool *pool) > +{ > + bool ret; > + > + spin_lock(>lock); > + ret = pool->isolated_pages == 0; > + spin_unlock(>lock); > + return ret; > +} > /** > * z3fold_destroy_pool() - destroys an existing z3fold pool > * @pool: the z3fold pool to be destroyed > @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) > { > kmem_cache_destroy(pool->c_handle); > > + /* > +* We need to ensure that no pages are being migrated while we destroy > +* these workqueues, as migration can queue work on either of the > +* workqueues. > +*/ > + wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); > + > /* > * We need to destroy pool->compact_wq before pool->release_wq, > * as any pending work on pool->compact_wq will call > @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool > *pool) > return atomic64_read(>pages_nr); > } > > +/* > + * z3fold_dec_isolated() expects to be called while pool->lock is held. > + */ > +static void z3fold_dec_isolated(struct z3fold_pool *pool) > +{ > + assert_spin_locked(>lock); > + VM_BUG_ON(pool->isolated_pages <= 0); > + pool->isolated_pages--; > + > + /* > +* If we have no more isolated pages, we have to see if > +* z3fold_destroy_pool() is waiting for a signal. > +*/ > + if (pool->isolated_pages == 0 && > waitqueue_active(>isolate_wait)) > + wake_up_all(>isolate_wait); > +} > + > +static void z3fold_inc_isolated(struct z3fold_pool *pool) > +{ > + pool->isolated_pages++; > +} > + > static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) > { > struct z3fold_header *zhdr; > @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, > isolate_mode_t mode) > spin_lock(>lock); > if (!list_empty(>lru)) > list_del(>lru); > + z3fold_inc_isolated(pool); > spin_unlock(>lock); > z3fold_page_unlock(zhdr); > return true; > @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space > *mapping, struct page *newpa > > queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work); > > + spin_lock(>lock); > + z3fold_dec_isolated(pool); > + spin_unlock(>lock); > + > page_mapcount_reset(page); > put_page(page); > return 0; > @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page) > INIT_LIST_HEAD(>lru); > if (kref_put(>refcount, release_z3fold_page_locked)) { > atomic64_dec(>pages_nr); > + spin_lock(>lock); > + z3fold_dec_isolated(pool); > + spin_unlock(>lock); > return; > } > spin_lock(>lock); > list_add(>lru, >lru); > + z3fold_dec_isolated(pool); > spin_unlock(>lock); > z3fold_page_unlock(zhdr); > } > -- > 2.22.0.770.g0f2c4a37fd-goog >
[PATCH] mm/z3fold.c: Fix race between migration and destruction
In z3fold_destroy_pool() we call destroy_workqueue(>compact_wq). However, we have no guarantee that migration isn't happening in the background at that time. Migration directly calls queue_work_on(pool->compact_wq), if destruction wins that race we are using a destroyed workqueue. Signed-off-by: Henry Burns --- mm/z3fold.c | 51 +++ 1 file changed, 51 insertions(+) diff --git a/mm/z3fold.c b/mm/z3fold.c index 78447cecfffa..e136d97ce56e 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -40,6 +40,7 @@ #include #include #include +#include #include /* @@ -161,8 +162,10 @@ struct z3fold_pool { const struct zpool_ops *zpool_ops; struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; + struct wait_queue_head isolate_wait; struct work_struct work; struct inode *inode; + int isolated_pages; }; /* @@ -772,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, goto out_c; spin_lock_init(>lock); spin_lock_init(>stale_lock); + init_waitqueue_head(>isolate_wait); pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); if (!pool->unbuddied) goto out_pool; @@ -811,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, return NULL; } +static bool pool_isolated_are_drained(struct z3fold_pool *pool) +{ + bool ret; + + spin_lock(>lock); + ret = pool->isolated_pages == 0; + spin_unlock(>lock); + return ret; +} /** * z3fold_destroy_pool() - destroys an existing z3fold pool * @pool: the z3fold pool to be destroyed @@ -821,6 +834,13 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); + /* +* We need to ensure that no pages are being migrated while we destroy +* these workqueues, as migration can queue work on either of the +* workqueues. +*/ + wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); + /* * We need to destroy pool->compact_wq before pool->release_wq, * as any pending work on pool->compact_wq will call @@ -1317,6 +1337,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool) return atomic64_read(>pages_nr); } +/* + * z3fold_dec_isolated() expects to be called while pool->lock is held. + */ +static void z3fold_dec_isolated(struct z3fold_pool *pool) +{ + assert_spin_locked(>lock); + VM_BUG_ON(pool->isolated_pages <= 0); + pool->isolated_pages--; + + /* +* If we have no more isolated pages, we have to see if +* z3fold_destroy_pool() is waiting for a signal. +*/ + if (pool->isolated_pages == 0 && waitqueue_active(>isolate_wait)) + wake_up_all(>isolate_wait); +} + +static void z3fold_inc_isolated(struct z3fold_pool *pool) +{ + pool->isolated_pages++; +} + static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) { struct z3fold_header *zhdr; @@ -1343,6 +1385,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) spin_lock(>lock); if (!list_empty(>lru)) list_del(>lru); + z3fold_inc_isolated(pool); spin_unlock(>lock); z3fold_page_unlock(zhdr); return true; @@ -1417,6 +1460,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, _zhdr->work); + spin_lock(>lock); + z3fold_dec_isolated(pool); + spin_unlock(>lock); + page_mapcount_reset(page); put_page(page); return 0; @@ -1436,10 +1483,14 @@ static void z3fold_page_putback(struct page *page) INIT_LIST_HEAD(>lru); if (kref_put(>refcount, release_z3fold_page_locked)) { atomic64_dec(>pages_nr); + spin_lock(>lock); + z3fold_dec_isolated(pool); + spin_unlock(>lock); return; } spin_lock(>lock); list_add(>lru, >lru); + z3fold_dec_isolated(pool); spin_unlock(>lock); z3fold_page_unlock(zhdr); } -- 2.22.0.770.g0f2c4a37fd-goog