The current memory_failure_dev_pagemap() can only handle single-mapped
dax page for fsdax mode. The dax page could be mapped by multiple files
and offsets if we let reflink feature & fsdax mode work together. So,
we refactor current implementation to support handle memory failure on
each file and offset.
Signed-off-by: Shiyang Ruan
---
fs/dax.c| 24 +-
include/linux/dax.h | 5 +-
include/linux/mm.h | 9
mm/memory-failure.c | 112 +---
4 files changed, 108 insertions(+), 42 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 5b47834f2e1b..1c17c4605bc0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -379,14 +379,17 @@ static struct page *dax_busy_page(void *entry)
}
/*
- * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page
- * @page: The page whose entry we want to lock
+ * dax_lock - Lock the DAX entry corresponding to a page
+ * @mapping: The file whose entry we want to lock
+ * @index: The offset where the DAX entry located in
+ * @pfnp:Page frame number of the locked entry
*
* Context: Process context.
* Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
* not be locked.
*/
-dax_entry_t dax_lock_page(struct page *page)
+dax_entry_t dax_lock(struct address_space *mapping, unsigned long index,
+ unsigned long *pfnp)
{
XA_STATE(xas, NULL, 0);
void *entry;
@@ -394,8 +397,6 @@ dax_entry_t dax_lock_page(struct page *page)
/* Ensure page->mapping isn't freed while we look at it */
rcu_read_lock();
for (;;) {
- struct address_space *mapping = READ_ONCE(page->mapping);
-
entry = NULL;
if (!mapping || !dax_mapping(mapping))
break;
@@ -413,11 +414,7 @@ dax_entry_t dax_lock_page(struct page *page)
xas.xa = &mapping->i_pages;
xas_lock_irq(&xas);
- if (mapping != page->mapping) {
- xas_unlock_irq(&xas);
- continue;
- }
- xas_set(&xas, page->index);
+ xas_set(&xas, index);
entry = xas_load(&xas);
if (dax_is_locked(entry)) {
rcu_read_unlock();
@@ -427,16 +424,17 @@ dax_entry_t dax_lock_page(struct page *page)
}
dax_lock_entry(&xas, entry);
xas_unlock_irq(&xas);
+ *pfnp = dax_to_pfn(entry);
break;
}
rcu_read_unlock();
return (dax_entry_t)entry;
}
-void dax_unlock_page(struct page *page, dax_entry_t cookie)
+void dax_unlock(struct address_space *mapping, unsigned long index,
+ dax_entry_t cookie)
{
- struct address_space *mapping = page->mapping;
- XA_STATE(xas, &mapping->i_pages, page->index);
+ XA_STATE(xas, &mapping->i_pages, index);
if (S_ISCHR(mapping->host->i_mode))
return;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..b24675af1de8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -150,8 +150,9 @@ int dax_writeback_mapping_range(struct address_space
*mapping,
struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t
start, loff_t end);
-dax_entry_t dax_lock_page(struct page *page);
-void dax_unlock_page(struct page *page, dax_entry_t cookie);
+dax_entry_t dax_lock(struct address_space *mapping, unsigned long index,
unsigned long *pfnp);
+void dax_unlock(struct address_space *mapping, unsigned long index,
+ dax_entry_t cookie);
#else
static inline bool bdev_dax_supported(struct block_device *bdev,
int blocksize)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d3fb4e..db3059a1853e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1141,6 +1141,14 @@ static inline bool is_device_private_page(const struct
page *page)
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
}
+static inline bool is_device_fsdax_page(const struct page *page)
+{
+ return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+ IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
+ is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_FS_DAX;
+}
+
static inline bool is_pci_p2pdma_page(const struct page *page)
{
return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
@@ -3030,6 +3038,7 @@ enum mf_flags {
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
};
+extern int mf_dax_mapping_kill_procs(struct address_space *mapping, pgoff_t
index, int flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5d880d4eb9a2..03a4f4c1b803 10064