Replace the existing entry to the newly allocated one in case of CoW.
Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this
entry as writeprotected.  This helps us snapshots so new write
pagefaults after snapshots trigger a CoW.

Signed-off-by: Goldwyn Rodrigues <rgold...@suse.com>
Signed-off-by: Shiyang Ruan <ruansy.f...@fujitsu.com>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Reviewed-by: Ritesh Harjani <rite...@linux.ibm.com>
Reviewed-by: Darrick J. Wong <djw...@kernel.org>
---
 fs/dax.c | 77 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 00d2cb72ec58..78e26204697b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -828,6 +828,23 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const 
struct iomap_iter *iter
        return 0;
 }
 
+/*
+ * MAP_SYNC on a dax mapping guarantees dirty metadata is
+ * flushed on write-faults (non-cow), but not read-faults.
+ */
+static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
+               struct vm_area_struct *vma)
+{
+       return (iter->flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) &&
+               (iter->iomap.flags & IOMAP_F_DIRTY);
+}
+
+static bool dax_fault_is_cow(const struct iomap_iter *iter)
+{
+       return (iter->flags & IOMAP_WRITE) &&
+               (iter->iomap.flags & IOMAP_F_SHARED);
+}
+
 /*
  * By this point grab_mapping_entry() has ensured that we have a locked entry
  * of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -835,16 +852,19 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const 
struct iomap_iter *iter
  * already in the tree, we will skip the insertion and just dirty the PMD as
  * appropriate.
  */
-static void *dax_insert_entry(struct xa_state *xas,
-               struct address_space *mapping, struct vm_fault *vmf,
-               void *entry, pfn_t pfn, unsigned long flags, bool dirty)
+static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
+               const struct iomap_iter *iter, void *entry, pfn_t pfn,
+               unsigned long flags)
 {
+       struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        void *new_entry = dax_make_entry(pfn, flags);
+       bool dirty = !dax_fault_is_synchronous(iter, vmf->vma);
+       bool cow = dax_fault_is_cow(iter);
 
        if (dirty)
                __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-       if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+       if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
                unsigned long index = xas->xa_index;
                /* we are replacing a zero page with block mapping */
                if (dax_is_pmd_entry(entry))
@@ -856,12 +876,12 @@ static void *dax_insert_entry(struct xa_state *xas,
 
        xas_reset(xas);
        xas_lock_irq(xas);
-       if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+       if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
                void *old;
 
                dax_disassociate_entry(entry, mapping, false);
                dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
-                               false);
+                               cow);
                /*
                 * Only swap our new entry into the page cache if the current
                 * entry is a zero page or an empty entry.  If a normal PTE or
@@ -881,6 +901,9 @@ static void *dax_insert_entry(struct xa_state *xas,
        if (dirty)
                xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
 
+       if (cow)
+               xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
+
        xas_unlock_irq(xas);
        return entry;
 }
@@ -1122,17 +1145,15 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t 
length, size_t align_size,
  * If this page is ever written to we will re-fault and change the mapping to
  * point to real DAX storage instead.
  */
-static vm_fault_t dax_load_hole(struct xa_state *xas,
-               struct address_space *mapping, void **entry,
-               struct vm_fault *vmf)
+static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
+               const struct iomap_iter *iter, void **entry)
 {
-       struct inode *inode = mapping->host;
+       struct inode *inode = iter->inode;
        unsigned long vaddr = vmf->address;
        pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
        vm_fault_t ret;
 
-       *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
-                       DAX_ZERO_PAGE, false);
+       *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
 
        ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
        trace_dax_load_hole(inode, vmf, ret);
@@ -1141,7 +1162,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
 
 #ifdef CONFIG_FS_DAX_PMD
 static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
-               const struct iomap *iomap, void **entry)
+               const struct iomap_iter *iter, void **entry)
 {
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        unsigned long pmd_addr = vmf->address & PMD_MASK;
@@ -1159,8 +1180,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, 
struct vm_fault *vmf,
                goto fallback;
 
        pfn = page_to_pfn_t(zero_page);
-       *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
-                       DAX_PMD | DAX_ZERO_PAGE, false);
+       *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
+                                 DAX_PMD | DAX_ZERO_PAGE);
 
        if (arch_needs_pgtable_deposit()) {
                pgtable = pte_alloc_one(vma->vm_mm);
@@ -1193,7 +1214,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, 
struct vm_fault *vmf,
 }
 #else
 static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
-               const struct iomap *iomap, void **entry)
+               const struct iomap_iter *iter, void **entry)
 {
        return VM_FAULT_FALLBACK;
 }
@@ -1427,17 +1448,6 @@ static vm_fault_t dax_fault_return(int error)
        return vmf_error(error);
 }
 
-/*
- * MAP_SYNC on a dax mapping guarantees dirty metadata is
- * flushed on write-faults (non-cow), but not read-faults.
- */
-static bool dax_fault_is_synchronous(unsigned long flags,
-               struct vm_area_struct *vma, const struct iomap *iomap)
-{
-       return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
-               && (iomap->flags & IOMAP_F_DIRTY);
-}
-
 /*
  * When handling a synchronous page fault and the inode need a fsync, we can
  * insert the PTE/PMD into page tables only after that fsync happened. Skip
@@ -1495,13 +1505,11 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
                const struct iomap_iter *iter, pfn_t *pfnp,
                struct xa_state *xas, void **entry, bool pmd)
 {
-       struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        const struct iomap *iomap = &iter->iomap;
        const struct iomap *srcmap = &iter->srcmap;
        size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
        loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
-       bool write = vmf->flags & FAULT_FLAG_WRITE;
-       bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap);
+       bool write = iter->flags & IOMAP_WRITE;
        unsigned long entry_flags = pmd ? DAX_PMD : 0;
        int err = 0;
        pfn_t pfn;
@@ -1514,8 +1522,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
        if (!write &&
            (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) {
                if (!pmd)
-                       return dax_load_hole(xas, mapping, entry, vmf);
-               return dax_pmd_load_hole(xas, vmf, iomap, entry);
+                       return dax_load_hole(xas, vmf, iter, entry);
+               return dax_pmd_load_hole(xas, vmf, iter, entry);
        }
 
        if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) {
@@ -1527,8 +1535,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
        if (err)
                return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
 
-       *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags,
-                                 write && !sync);
+       *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
 
        if (write &&
            srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
@@ -1537,7 +1544,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
                        return dax_fault_return(err);
        }
 
-       if (sync)
+       if (dax_fault_is_synchronous(iter, vmf->vma))
                return dax_fault_synchronous_pfnp(pfnp, pfn);
 
        /* insert PMD pfn */
-- 
2.35.1




Reply via email to