Re: [PATCH v4 7/8] hugetlb: make free_huge_page irq safe

2021-04-07 Thread Oscar Salvador
On Wed, Apr 07, 2021 at 11:33:18AM +0200, Michal Hocko wrote:
> Yes. spin_unlock_irq will enable interrupts unconditionally which is
> certainly not what we want if the path is called with IRQ disabled by
> the caller.

I see, thanks for confirming.

-- 
Oscar Salvador
SUSE L3


Re: [PATCH v4 7/8] hugetlb: make free_huge_page irq safe

2021-04-07 Thread Michal Hocko
On Wed 07-04-21 11:12:37, Oscar Salvador wrote:
> On Mon, Apr 05, 2021 at 04:00:42PM -0700, Mike Kravetz wrote:
> > Commit c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in
> > non-task context") was added to address the issue of free_huge_page
> > being called from irq context.  That commit hands off free_huge_page
> > processing to a workqueue if !in_task.  However, this doesn't cover
> > all the cases as pointed out by 0day bot lockdep report [1].
> > 
> > :  Possible interrupt unsafe locking scenario:
> > :
> > :CPU0CPU1
> > :
> > :   lock(hugetlb_lock);
> > :local_irq_disable();
> > :lock(slock-AF_INET);
> > :lock(hugetlb_lock);
> > :   
> > : lock(slock-AF_INET);
> > 
> > Shakeel has later explained that this is very likely TCP TX zerocopy
> > from hugetlb pages scenario when the networking code drops a last
> > reference to hugetlb page while having IRQ disabled. Hugetlb freeing
> > path doesn't disable IRQ while holding hugetlb_lock so a lock dependency
> > chain can lead to a deadlock.
> > 
> > This commit addresses the issue by doing the following:
> > - Make hugetlb_lock irq safe.  This is mostly a simple process of
> >   changing spin_*lock calls to spin_*lock_irq* calls.
> > - Make subpool lock irq safe in a similar manner.
> > - Revert the !in_task check and workqueue handoff.
> > 
> > [1] 
> > https://lore.kernel.org/linux-mm/f1c03b05bc43a...@google.com/
> > 
> > Signed-off-by: Mike Kravetz 
> > Acked-by: Michal Hocko 
> > Reviewed-by: Muchun Song 
> 
> So, irq_lock_irqsave/spin_unlock_irqrestore is to be used in places
> that might have been called from an IRQ context?

Yes. spin_unlock_irq will enable interrupts unconditionally which is
certainly not what we want if the path is called with IRQ disabled by
the caller.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH v4 7/8] hugetlb: make free_huge_page irq safe

2021-04-07 Thread Oscar Salvador
On Mon, Apr 05, 2021 at 04:00:42PM -0700, Mike Kravetz wrote:
> Commit c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in
> non-task context") was added to address the issue of free_huge_page
> being called from irq context.  That commit hands off free_huge_page
> processing to a workqueue if !in_task.  However, this doesn't cover
> all the cases as pointed out by 0day bot lockdep report [1].
> 
> :  Possible interrupt unsafe locking scenario:
> :
> :CPU0CPU1
> :
> :   lock(hugetlb_lock);
> :local_irq_disable();
> :lock(slock-AF_INET);
> :lock(hugetlb_lock);
> :   
> : lock(slock-AF_INET);
> 
> Shakeel has later explained that this is very likely TCP TX zerocopy
> from hugetlb pages scenario when the networking code drops a last
> reference to hugetlb page while having IRQ disabled. Hugetlb freeing
> path doesn't disable IRQ while holding hugetlb_lock so a lock dependency
> chain can lead to a deadlock.
> 
> This commit addresses the issue by doing the following:
> - Make hugetlb_lock irq safe.  This is mostly a simple process of
>   changing spin_*lock calls to spin_*lock_irq* calls.
> - Make subpool lock irq safe in a similar manner.
> - Revert the !in_task check and workqueue handoff.
> 
> [1] https://lore.kernel.org/linux-mm/f1c03b05bc43a...@google.com/
> 
> Signed-off-by: Mike Kravetz 
> Acked-by: Michal Hocko 
> Reviewed-by: Muchun Song 

So, irq_lock_irqsave/spin_unlock_irqrestore is to be used in places
that might have been called from an IRQ context?

Looks good to me:

Reviewed-by: Oscar Salvador 

> ---
>  mm/hugetlb.c| 183 +---
>  mm/hugetlb_cgroup.c |   8 +-
>  2 files changed, 74 insertions(+), 117 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 93a2a11b9376..15b6e7aadb52 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -93,9 +93,10 @@ static inline bool subpool_is_free(struct hugepage_subpool 
> *spool)
>   return true;
>  }
>  
> -static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
> +static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
> + unsigned long irq_flags)
>  {
> - spin_unlock(>lock);
> + spin_unlock_irqrestore(>lock, irq_flags);
>  
>   /* If no pages are used, and no other handles to the subpool
>* remain, give up any reservations based on minimum size and
> @@ -134,10 +135,12 @@ struct hugepage_subpool *hugepage_new_subpool(struct 
> hstate *h, long max_hpages,
>  
>  void hugepage_put_subpool(struct hugepage_subpool *spool)
>  {
> - spin_lock(>lock);
> + unsigned long flags;
> +
> + spin_lock_irqsave(>lock, flags);
>   BUG_ON(!spool->count);
>   spool->count--;
> - unlock_or_release_subpool(spool);
> + unlock_or_release_subpool(spool, flags);
>  }
>  
>  /*
> @@ -156,7 +159,7 @@ static long hugepage_subpool_get_pages(struct 
> hugepage_subpool *spool,
>   if (!spool)
>   return ret;
>  
> - spin_lock(>lock);
> + spin_lock_irq(>lock);
>  
>   if (spool->max_hpages != -1) {  /* maximum size accounting */
>   if ((spool->used_hpages + delta) <= spool->max_hpages)
> @@ -183,7 +186,7 @@ static long hugepage_subpool_get_pages(struct 
> hugepage_subpool *spool,
>   }
>  
>  unlock_ret:
> - spin_unlock(>lock);
> + spin_unlock_irq(>lock);
>   return ret;
>  }
>  
> @@ -197,11 +200,12 @@ static long hugepage_subpool_put_pages(struct 
> hugepage_subpool *spool,
>  long delta)
>  {
>   long ret = delta;
> + unsigned long flags;
>  
>   if (!spool)
>   return delta;
>  
> - spin_lock(>lock);
> + spin_lock_irqsave(>lock, flags);
>  
>   if (spool->max_hpages != -1)/* maximum size accounting */
>   spool->used_hpages -= delta;
> @@ -222,7 +226,7 @@ static long hugepage_subpool_put_pages(struct 
> hugepage_subpool *spool,
>* If hugetlbfs_put_super couldn't free spool due to an outstanding
>* quota reference, free it now.
>*/
> - unlock_or_release_subpool(spool);
> + unlock_or_release_subpool(spool, flags);
>  
>   return ret;
>  }
> @@ -1407,7 +1411,7 @@ struct hstate *size_to_hstate(unsigned long size)
>   return NULL;
>  }
>  
> -static void __free_huge_page(struct page *page)
> +void free_huge_page(struct page *page)
>  {
>   /*
>* Can't pass hstate in here because it is called from the
> @@ -1417,6 +1421,7 @@ static void __free_huge_page(struct page *page)
>   int nid = page_to_nid(page);
>   struct hugepage_subpool *spool = hugetlb_page_subpool(page);
>   bool restore_reserve;
> + unsigned long flags;
>  
>   VM_BUG_ON_PAGE(page_count(page), page);
>   

[PATCH v4 7/8] hugetlb: make free_huge_page irq safe

2021-04-05 Thread Mike Kravetz
Commit c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in
non-task context") was added to address the issue of free_huge_page
being called from irq context.  That commit hands off free_huge_page
processing to a workqueue if !in_task.  However, this doesn't cover
all the cases as pointed out by 0day bot lockdep report [1].

:  Possible interrupt unsafe locking scenario:
:
:CPU0CPU1
:
:   lock(hugetlb_lock);
:local_irq_disable();
:lock(slock-AF_INET);
:lock(hugetlb_lock);
:   
: lock(slock-AF_INET);

Shakeel has later explained that this is very likely TCP TX zerocopy
from hugetlb pages scenario when the networking code drops a last
reference to hugetlb page while having IRQ disabled. Hugetlb freeing
path doesn't disable IRQ while holding hugetlb_lock so a lock dependency
chain can lead to a deadlock.

This commit addresses the issue by doing the following:
- Make hugetlb_lock irq safe.  This is mostly a simple process of
  changing spin_*lock calls to spin_*lock_irq* calls.
- Make subpool lock irq safe in a similar manner.
- Revert the !in_task check and workqueue handoff.

[1] https://lore.kernel.org/linux-mm/f1c03b05bc43a...@google.com/

Signed-off-by: Mike Kravetz 
Acked-by: Michal Hocko 
Reviewed-by: Muchun Song 
---
 mm/hugetlb.c| 183 +---
 mm/hugetlb_cgroup.c |   8 +-
 2 files changed, 74 insertions(+), 117 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 93a2a11b9376..15b6e7aadb52 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -93,9 +93,10 @@ static inline bool subpool_is_free(struct hugepage_subpool 
*spool)
return true;
 }
 
-static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
+   unsigned long irq_flags)
 {
-   spin_unlock(>lock);
+   spin_unlock_irqrestore(>lock, irq_flags);
 
/* If no pages are used, and no other handles to the subpool
 * remain, give up any reservations based on minimum size and
@@ -134,10 +135,12 @@ struct hugepage_subpool *hugepage_new_subpool(struct 
hstate *h, long max_hpages,
 
 void hugepage_put_subpool(struct hugepage_subpool *spool)
 {
-   spin_lock(>lock);
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
BUG_ON(!spool->count);
spool->count--;
-   unlock_or_release_subpool(spool);
+   unlock_or_release_subpool(spool, flags);
 }
 
 /*
@@ -156,7 +159,7 @@ static long hugepage_subpool_get_pages(struct 
hugepage_subpool *spool,
if (!spool)
return ret;
 
-   spin_lock(>lock);
+   spin_lock_irq(>lock);
 
if (spool->max_hpages != -1) {  /* maximum size accounting */
if ((spool->used_hpages + delta) <= spool->max_hpages)
@@ -183,7 +186,7 @@ static long hugepage_subpool_get_pages(struct 
hugepage_subpool *spool,
}
 
 unlock_ret:
-   spin_unlock(>lock);
+   spin_unlock_irq(>lock);
return ret;
 }
 
@@ -197,11 +200,12 @@ static long hugepage_subpool_put_pages(struct 
hugepage_subpool *spool,
   long delta)
 {
long ret = delta;
+   unsigned long flags;
 
if (!spool)
return delta;
 
-   spin_lock(>lock);
+   spin_lock_irqsave(>lock, flags);
 
if (spool->max_hpages != -1)/* maximum size accounting */
spool->used_hpages -= delta;
@@ -222,7 +226,7 @@ static long hugepage_subpool_put_pages(struct 
hugepage_subpool *spool,
 * If hugetlbfs_put_super couldn't free spool due to an outstanding
 * quota reference, free it now.
 */
-   unlock_or_release_subpool(spool);
+   unlock_or_release_subpool(spool, flags);
 
return ret;
 }
@@ -1407,7 +1411,7 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL;
 }
 
-static void __free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
 {
/*
 * Can't pass hstate in here because it is called from the
@@ -1417,6 +1421,7 @@ static void __free_huge_page(struct page *page)
int nid = page_to_nid(page);
struct hugepage_subpool *spool = hugetlb_page_subpool(page);
bool restore_reserve;
+   unsigned long flags;
 
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(page_mapcount(page), page);
@@ -1445,7 +1450,7 @@ static void __free_huge_page(struct page *page)
restore_reserve = true;
}
 
-   spin_lock(_lock);
+   spin_lock_irqsave(_lock, flags);
ClearHPageMigratable(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
 pages_per_huge_page(h), page);
@@ -1456,66