On Mon, Apr 05, 2021 at 04:00:42PM -0700, Mike Kravetz wrote:
> Commit c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in
> non-task context") was added to address the issue of free_huge_page
> being called from irq context. That commit hands off free_huge_page
> processing to a workqueue if !in_task. However, this doesn't cover
> all the cases as pointed out by 0day bot lockdep report [1].
>
> : Possible interrupt unsafe locking scenario:
> :
> :CPU0CPU1
> :
> : lock(hugetlb_lock);
> :local_irq_disable();
> :lock(slock-AF_INET);
> :lock(hugetlb_lock);
> :
> : lock(slock-AF_INET);
>
> Shakeel has later explained that this is very likely TCP TX zerocopy
> from hugetlb pages scenario when the networking code drops a last
> reference to hugetlb page while having IRQ disabled. Hugetlb freeing
> path doesn't disable IRQ while holding hugetlb_lock so a lock dependency
> chain can lead to a deadlock.
>
> This commit addresses the issue by doing the following:
> - Make hugetlb_lock irq safe. This is mostly a simple process of
> changing spin_*lock calls to spin_*lock_irq* calls.
> - Make subpool lock irq safe in a similar manner.
> - Revert the !in_task check and workqueue handoff.
>
> [1] https://lore.kernel.org/linux-mm/f1c03b05bc43a...@google.com/
>
> Signed-off-by: Mike Kravetz
> Acked-by: Michal Hocko
> Reviewed-by: Muchun Song
So, irq_lock_irqsave/spin_unlock_irqrestore is to be used in places
that might have been called from an IRQ context?
Looks good to me:
Reviewed-by: Oscar Salvador
> ---
> mm/hugetlb.c| 183 +---
> mm/hugetlb_cgroup.c | 8 +-
> 2 files changed, 74 insertions(+), 117 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 93a2a11b9376..15b6e7aadb52 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -93,9 +93,10 @@ static inline bool subpool_is_free(struct hugepage_subpool
> *spool)
> return true;
> }
>
> -static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
> +static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
> + unsigned long irq_flags)
> {
> - spin_unlock(>lock);
> + spin_unlock_irqrestore(>lock, irq_flags);
>
> /* If no pages are used, and no other handles to the subpool
>* remain, give up any reservations based on minimum size and
> @@ -134,10 +135,12 @@ struct hugepage_subpool *hugepage_new_subpool(struct
> hstate *h, long max_hpages,
>
> void hugepage_put_subpool(struct hugepage_subpool *spool)
> {
> - spin_lock(>lock);
> + unsigned long flags;
> +
> + spin_lock_irqsave(>lock, flags);
> BUG_ON(!spool->count);
> spool->count--;
> - unlock_or_release_subpool(spool);
> + unlock_or_release_subpool(spool, flags);
> }
>
> /*
> @@ -156,7 +159,7 @@ static long hugepage_subpool_get_pages(struct
> hugepage_subpool *spool,
> if (!spool)
> return ret;
>
> - spin_lock(>lock);
> + spin_lock_irq(>lock);
>
> if (spool->max_hpages != -1) { /* maximum size accounting */
> if ((spool->used_hpages + delta) <= spool->max_hpages)
> @@ -183,7 +186,7 @@ static long hugepage_subpool_get_pages(struct
> hugepage_subpool *spool,
> }
>
> unlock_ret:
> - spin_unlock(>lock);
> + spin_unlock_irq(>lock);
> return ret;
> }
>
> @@ -197,11 +200,12 @@ static long hugepage_subpool_put_pages(struct
> hugepage_subpool *spool,
> long delta)
> {
> long ret = delta;
> + unsigned long flags;
>
> if (!spool)
> return delta;
>
> - spin_lock(>lock);
> + spin_lock_irqsave(>lock, flags);
>
> if (spool->max_hpages != -1)/* maximum size accounting */
> spool->used_hpages -= delta;
> @@ -222,7 +226,7 @@ static long hugepage_subpool_put_pages(struct
> hugepage_subpool *spool,
>* If hugetlbfs_put_super couldn't free spool due to an outstanding
>* quota reference, free it now.
>*/
> - unlock_or_release_subpool(spool);
> + unlock_or_release_subpool(spool, flags);
>
> return ret;
> }
> @@ -1407,7 +1411,7 @@ struct hstate *size_to_hstate(unsigned long size)
> return NULL;
> }
>
> -static void __free_huge_page(struct page *page)
> +void free_huge_page(struct page *page)
> {
> /*
>* Can't pass hstate in here because it is called from the
> @@ -1417,6 +1421,7 @@ static void __free_huge_page(struct page *page)
> int nid = page_to_nid(page);
> struct hugepage_subpool *spool = hugetlb_page_subpool(page);
> bool restore_reserve;
> + unsigned long flags;
>
> VM_BUG_ON_PAGE(page_count(page), page);
>