Re: [PATCH v1 03/11] mm/page_alloc: refactor memmap_init_zone_device() page init

2021-04-23 Thread Dan Williams
On Thu, Mar 25, 2021 at 4:10 PM Joao Martins  wrote:
>
> Move struct page init to an helper function __init_zone_device_page().

Same sentence addition suggestion from the last patch to make this
patch have some rationale for existing.

>
> Signed-off-by: Joao Martins 
> ---
>  mm/page_alloc.c | 74 +++--
>  1 file changed, 41 insertions(+), 33 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 43dd98446b0b..58974067bbd4 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6237,6 +6237,46 @@ void __meminit memmap_init_range(unsigned long size, 
> int nid, unsigned long zone
>  }
>
>  #ifdef CONFIG_ZONE_DEVICE
> +static void __ref __init_zone_device_page(struct page *page, unsigned long 
> pfn,
> + unsigned long zone_idx, int nid,
> + struct dev_pagemap *pgmap)
> +{
> +
> +   __init_single_page(page, pfn, zone_idx, nid);
> +
> +   /*
> +* Mark page reserved as it will need to wait for onlining
> +* phase for it to be fully associated with a zone.
> +*
> +* We can use the non-atomic __set_bit operation for setting
> +* the flag as we are still initializing the pages.
> +*/
> +   __SetPageReserved(page);
> +
> +   /*
> +* ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
> +* and zone_device_data.  It is a bug if a ZONE_DEVICE page is
> +* ever freed or placed on a driver-private list.
> +*/
> +   page->pgmap = pgmap;
> +   page->zone_device_data = NULL;
> +
> +   /*
> +* Mark the block movable so that blocks are reserved for
> +* movable at startup. This will force kernel allocations
> +* to reserve their blocks rather than leaking throughout
> +* the address space during boot when many long-lived
> +* kernel allocations are made.
> +*
> +* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
> +* because this is done early in section_activate()
> +*/
> +   if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
> +   set_pageblock_migratetype(page, MIGRATE_MOVABLE);
> +   cond_resched();
> +   }
> +}
> +
>  void __ref memmap_init_zone_device(struct zone *zone,
>unsigned long start_pfn,
>unsigned long nr_pages,
> @@ -6265,39 +6305,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
> for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> struct page *page = pfn_to_page(pfn);
>
> -   __init_single_page(page, pfn, zone_idx, nid);
> -
> -   /*
> -* Mark page reserved as it will need to wait for onlining
> -* phase for it to be fully associated with a zone.
> -*
> -* We can use the non-atomic __set_bit operation for setting
> -* the flag as we are still initializing the pages.
> -*/
> -   __SetPageReserved(page);
> -
> -   /*
> -* ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
> -* and zone_device_data.  It is a bug if a ZONE_DEVICE page is
> -* ever freed or placed on a driver-private list.
> -*/
> -   page->pgmap = pgmap;
> -   page->zone_device_data = NULL;
> -
> -   /*
> -* Mark the block movable so that blocks are reserved for
> -* movable at startup. This will force kernel allocations
> -* to reserve their blocks rather than leaking throughout
> -* the address space during boot when many long-lived
> -* kernel allocations are made.
> -*
> -* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
> -* because this is done early in section_activate()
> -*/
> -   if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
> -   set_pageblock_migratetype(page, MIGRATE_MOVABLE);
> -   cond_resched();
> -   }
> +   __init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
> }
>
> pr_info("%s initialised %lu pages in %ums\n", __func__,
> --
> 2.17.1
>
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


Re: [PATCH v1 02/11] mm/page_alloc: split prep_compound_page into head and tail subparts

2021-04-23 Thread Dan Williams
On Thu, Mar 25, 2021 at 4:10 PM Joao Martins  wrote:
>
> Split the utility function prep_compound_page() into head and tail
> counterparts, and use them accordingly.

To make this patch stand alone better lets add another sentence:

"This is in preparation for sharing the storage for / deduplicating
compound page metadata."

Other than that, looks good to me.

>
> Signed-off-by: Joao Martins 
> ---
>  mm/page_alloc.c | 32 +---
>  1 file changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c53fe4fa10bf..43dd98446b0b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -692,24 +692,34 @@ void free_compound_page(struct page *page)
> __free_pages_ok(page, compound_order(page), FPI_NONE);
>  }
>
> +static void prep_compound_head(struct page *page, unsigned int order)
> +{
> +   set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
> +   set_compound_order(page, order);
> +   atomic_set(compound_mapcount_ptr(page), -1);
> +   if (hpage_pincount_available(page))
> +   atomic_set(compound_pincount_ptr(page), 0);
> +}
> +
> +static void prep_compound_tail(struct page *head, int tail_idx)
> +{
> +   struct page *p = head + tail_idx;
> +
> +   set_page_count(p, 0);
> +   p->mapping = TAIL_MAPPING;
> +   set_compound_head(p, head);
> +}
> +
>  void prep_compound_page(struct page *page, unsigned int order)
>  {
> int i;
> int nr_pages = 1 << order;
>
> __SetPageHead(page);
> -   for (i = 1; i < nr_pages; i++) {
> -   struct page *p = page + i;
> -   set_page_count(p, 0);
> -   p->mapping = TAIL_MAPPING;
> -   set_compound_head(p, page);
> -   }
> +   for (i = 1; i < nr_pages; i++)
> +   prep_compound_tail(page, i);
>
> -   set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
> -   set_compound_order(page, order);
> -   atomic_set(compound_mapcount_ptr(page), -1);
> -   if (hpage_pincount_available(page))
> -   atomic_set(compound_pincount_ptr(page), 0);
> +   prep_compound_head(page, order);
>  }
>
>  #ifdef CONFIG_DEBUG_PAGEALLOC
> --
> 2.17.1
>
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


Re: [PATCH v1 01/11] memory-failure: fetch compound_head after pgmap_pfn_valid()

2021-04-23 Thread Dan Williams
On Thu, Mar 25, 2021 at 4:10 PM Joao Martins  wrote:
>
> memory_failure_dev_pagemap() at the moment assumes base pages (e.g.
> dax_lock_page()).  For pagemap with compound pages fetch the
> compound_head in case we are handling a tail page memory failure.
>
> Currently this is a nop, but in the advent of compound pages in
> dev_pagemap it allows memory_failure_dev_pagemap() to keep working.
>
> Reported-by: Jane Chu 
> Signed-off-by: Joao Martins 
> ---
>  mm/memory-failure.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 24210c9bd843..94240d772623 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1318,6 +1318,8 @@ static int memory_failure_dev_pagemap(unsigned long 
> pfn, int flags,
> goto out;
> }
>
> +   page = compound_head(page);

Unless / until we do compound pages for the filesystem-dax case, I
would add a comment like:

/* pages instantiated by device-dax (not filesystem-dax) may be
compound pages */
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


OFFER

2021-04-23 Thread Maria
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


From Ms Maria Willasey

2021-04-23 Thread Maria
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


Re: [PATCH v4 0/3] dax: Fix missed wakeup in put_unlocked_entry()

2021-04-23 Thread Dan Williams
On Fri, Apr 23, 2021 at 6:07 AM Vivek Goyal  wrote:
>
> Hi,
>
> This is V4 of the patches. Posted V3 here.
>
> https://lore.kernel.org/linux-fsdevel/20210419213636.1514816-1-vgo...@redhat.com/
>
> Changes since V3 are.
>
> - Renamed "enum dax_entry_wake_mode" to "enum dax_wake_mode" (Matthew Wilcox)
> - Changed description of WAKE_NEXT and WAKE_ALL (Jan Kara)
> - Got rid of a comment (Greg Kurz)

Looks good Vivek, thanks for the resend.
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


New Business Invitation from Ibrahim Sadiq

2021-04-23 Thread LinkedIn


 LinkedIn  
 
 HI   
 We want to establish a strong business relationship with your company. Please 
contact me for more details about our order,
Kindly get back to me with your company brochure/catalogue.
 Ibrahim Sadiq
Sales Manager
 
 Accept Add MeView Business Profile
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


[PATCH v4 1/3] dax: Add an enum for specifying dax wakup mode

2021-04-23 Thread Vivek Goyal
Dan mentioned that he is not very fond of passing around a boolean true/false
to specify if only next waiter should be woken up or all waiters should be
woken up. He instead prefers that we introduce an enum and make it very
explicity at the callsite itself. Easier to read code.

This patch should not introduce any change of behavior.

Reviewed-by: Greg Kurz 
Reviewed-by: Jan Kara 
Suggested-by: Dan Williams 
Signed-off-by: Vivek Goyal 
---
 fs/dax.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b3d27fdc6775..4b1918b9ad97 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
 };
 
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ */
+enum dax_wake_mode {
+   WAKE_NEXT,
+   WAKE_ALL,
+};
+
 static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
void *entry, struct exceptional_entry_key *key)
 {
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t 
*wait,
  * The important information it's conveying is whether the entry at
  * this index used to be a PMD entry.
  */
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+  enum dax_wake_mode mode)
 {
struct exceptional_entry_key key;
wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void 
*entry, bool wake_all)
 * must be in the waitqueue and the following check will see them.
 */
if (waitqueue_active(wq))
-   __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, );
+   __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, );
 }
 
 /*
@@ -268,7 +279,7 @@ static void put_unlocked_entry(struct xa_state *xas, void 
*entry)
 {
/* If we were the only waiter woken, wake the next one */
if (entry && !dax_is_conflict(entry))
-   dax_wake_entry(xas, entry, false);
+   dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void 
*entry)
old = xas_store(xas, entry);
xas_unlock_irq(xas);
BUG_ON(!dax_is_locked(old));
-   dax_wake_entry(xas, entry, false);
+   dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -524,7 +535,7 @@ static void *grab_mapping_entry(struct xa_state *xas,
 
dax_disassociate_entry(entry, mapping, false);
xas_store(xas, NULL);   /* undo the PMD join */
-   dax_wake_entry(xas, entry, true);
+   dax_wake_entry(xas, entry, WAKE_ALL);
mapping->nrexceptional--;
entry = NULL;
xas_set(xas, index);
@@ -937,7 +948,7 @@ static int dax_writeback_one(struct xa_state *xas, struct 
dax_device *dax_dev,
xas_lock_irq(xas);
xas_store(xas, entry);
xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
-   dax_wake_entry(xas, entry, false);
+   dax_wake_entry(xas, entry, WAKE_NEXT);
 
trace_dax_writeback_one(mapping->host, index, count);
return ret;
-- 
2.25.4
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


[PATCH v4 3/3] dax: Wake up all waiters after invalidating dax entry

2021-04-23 Thread Vivek Goyal
I am seeing missed wakeups which ultimately lead to a deadlock when I am
using virtiofs with DAX enabled and running "make -j". I had to mount
virtiofs as rootfs and also reduce to dax window size to 256M to reproduce
the problem consistently.

So here is the problem. put_unlocked_entry() wakes up waiters only
if entry is not null as well as !dax_is_conflict(entry). But if I
call multiple instances of invalidate_inode_pages2() in parallel,
then I can run into a situation where there are waiters on
this index but nobody will wake these waiters.

invalidate_inode_pages2()
  invalidate_inode_pages2_range()
invalidate_exceptional_entry2()
  dax_invalidate_mapping_entry_sync()
__dax_invalidate_entry() {
xas_lock_irq();
entry = get_unlocked_entry(, 0);
...
...
dax_disassociate_entry(entry, mapping, trunc);
xas_store(, NULL);
...
...
put_unlocked_entry(, entry);
xas_unlock_irq();
}

Say a fault in in progress and it has locked entry at offset say "0x1c".
Now say three instances of invalidate_inode_pages2() are in progress
(A, B, C) and they all try to invalidate entry at offset "0x1c". Given
dax entry is locked, all tree instances A, B, C will wait in wait queue.

When dax fault finishes, say A is woken up. It will store NULL entry
at index "0x1c" and wake up B. When B comes along it will find "entry=0"
at page offset 0x1c and it will call put_unlocked_entry(, 0). And
this means put_unlocked_entry() will not wake up next waiter, given
the current code. And that means C continues to wait and is not woken
up.

This patch fixes the issue by waking up all waiters when a dax entry
has been invalidated. This seems to fix the deadlock I am facing
and I can make forward progress.

Reported-by: Sergio Lopez 
Fixes: ac401cc78242 ("dax: New fault locking")
Reviewed-by: Jan Kara 
Suggested-by: Dan Williams 
Signed-off-by: Vivek Goyal 
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 96e896de8f18..83daa57d37d3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space 
*mapping,
mapping->nrexceptional--;
ret = 1;
 out:
-   put_unlocked_entry(, entry, WAKE_NEXT);
+   put_unlocked_entry(, entry, WAKE_ALL);
xas_unlock_irq();
return ret;
 }
-- 
2.25.4
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


[PATCH v4 2/3] dax: Add a wakeup mode parameter to put_unlocked_entry()

2021-04-23 Thread Vivek Goyal
As of now put_unlocked_entry() always wakes up next waiter. In next
patches we want to wake up all waiters at one callsite. Hence, add a
parameter to the function.

This patch does not introduce any change of behavior.

Reviewed-by: Greg Kurz 
Reviewed-by: Jan Kara 
Suggested-by: Dan Williams 
Signed-off-by: Vivek Goyal 
---
 fs/dax.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 4b1918b9ad97..96e896de8f18 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -275,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, 
void *entry)
finish_wait(wq, );
 }
 
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+  enum dax_wake_mode mode)
 {
-   /* If we were the only waiter woken, wake the next one */
if (entry && !dax_is_conflict(entry))
-   dax_wake_entry(xas, entry, WAKE_NEXT);
+   dax_wake_entry(xas, entry, mode);
 }
 
 /*
@@ -633,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct 
address_space *mapping,
entry = get_unlocked_entry(, 0);
if (entry)
page = dax_busy_page(entry);
-   put_unlocked_entry(, entry);
+   put_unlocked_entry(, entry, WAKE_NEXT);
if (page)
break;
if (++scanned % XA_CHECK_SCHED)
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space 
*mapping,
mapping->nrexceptional--;
ret = 1;
 out:
-   put_unlocked_entry(, entry);
+   put_unlocked_entry(, entry, WAKE_NEXT);
xas_unlock_irq();
return ret;
 }
@@ -954,7 +954,7 @@ static int dax_writeback_one(struct xa_state *xas, struct 
dax_device *dax_dev,
return ret;
 
  put_unlocked:
-   put_unlocked_entry(xas, entry);
+   put_unlocked_entry(xas, entry, WAKE_NEXT);
return ret;
 }
 
@@ -1695,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, 
unsigned int order)
/* Did we race with someone splitting entry or so? */
if (!entry || dax_is_conflict(entry) ||
(order == 0 && !dax_is_pte_entry(entry))) {
-   put_unlocked_entry(, entry);
+   put_unlocked_entry(, entry, WAKE_NEXT);
xas_unlock_irq();
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
  VM_FAULT_NOPAGE);
-- 
2.25.4
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


[PATCH v4 0/3] dax: Fix missed wakeup in put_unlocked_entry()

2021-04-23 Thread Vivek Goyal
Hi,

This is V4 of the patches. Posted V3 here.

https://lore.kernel.org/linux-fsdevel/20210419213636.1514816-1-vgo...@redhat.com/

Changes since V3 are.

- Renamed "enum dax_entry_wake_mode" to "enum dax_wake_mode" (Matthew Wilcox)
- Changed description of WAKE_NEXT and WAKE_ALL (Jan Kara) 
- Got rid of a comment (Greg Kurz)

Thanks
Vivek

Vivek Goyal (3):
  dax: Add an enum for specifying dax wakup mode
  dax: Add a wakeup mode parameter to put_unlocked_entry()
  dax: Wake up all waiters after invalidating dax entry

 fs/dax.c | 35 +++
 1 file changed, 23 insertions(+), 12 deletions(-)

-- 
2.25.4
___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org


XH汉隆中港物流快运,安全,快速,直达,做您最信任的物流 选对事业可以成就一生,选对朋友可以智能一生,选对环境可以快乐一生,选对伴侣可以幸福一生,选对生活方式可以健康一生。

2021-04-23 Thread 向生
东莞汉隆国际物流有限公司(东莞分公司) 东莞地址:东莞市长安镇霄边大街南29号联兴商务大厦8楼 业务网络:香港|深圳|广州|佛山|东莞|厦门|义乌|台州|宁波|上海|青岛|天津|大连|福建|浙江 (中国内地均可安排上门提货)联络:向何 手机: 1 8 0 3 8 2 8 5 3 8 9 (同步微信)邮箱:xh130...@126.com QUOTATION OF PRICES FW:向生 TO: 出口�B专业�B简单 你好!回复您关于国内到香港买单出口和一般贸易出口费用,敬请参考! 出口流程: 国内提货―装车―报关―发车―香港卸货-香港装车送货上门 一、陆地口岸出口费用: (1).报关费 买单出口 60 单报关费 一般贸易报关费 200/单 (3).中港费  重货 0.7/KG 轻货150/CBM(口罩防疫类750/CBM) 大货价:重货 0.6/KG 轻货 140/CBM(4).香港入仓费 空运仓 300/单 海运仓 350/单 派送费 200/单偏远加 80 偏远费 备注:香港派送和入仓限 500/KG 和 3/CBM 超出费用按照 0.3/KG 或者 50/CBM 计费,入仓派 送停车费登记费实报实销 我司在,深圳,东莞,广州,上海,广州,宁波,温州设有仓库 香港仓库地址:新界葵涌货柜码头南路汇宝动力停车场 备注见附页 备注: 1.双方合作前,可签订出口代理协议。 2.此报价不含税。如须开票,需加 3%税点。=千里之行,始于足下。改变将来,从现在开始。改变现在,就是改变未来。人生就像掌纹,尽管错综复杂,却始终掌握在自己手中。积极者相信只有推动自己才能推动世界,只要推动自己就能推动世界。时间告诉你什么叫衰老,回忆告诉你什么叫幼稚。不要总在过去的回忆里缠绵,昨天的太阳,晒不干今天的衣裳。成功需要成本,时间也是一种成本,对时间的珍惜就是对成本的节约。___
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-le...@lists.01.org