Normally, when accessing a mmapped file, entering the page fault, the
file's (->mapping, ->index) will be associated with dax entry(represents
for one page or a couple of pages) to facilitate the reverse mapping
search.  But in the case of reflink, a dax entry may be shared by multiple
files or offsets.  In order to establish a reverse mapping relationship in
this case, I introduce a rb-tree to track multiple files and offsets.

The root of the rb-tree is stored in page->private, since I haven't found
it be used in fsdax.  We create the rb-tree and insert the
(->mapping, ->index) tuple in the second time a dax entry is associated,
which means this dax entry is shared.  And delete this tuple from the
rb-tree when disassociating.

Signed-off-by: Shiyang Ruan <[email protected]>
---
 fs/dax.c            | 153 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/dax.h |   6 ++
 2 files changed, 147 insertions(+), 12 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 11b16729b86f..2f996c566103 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/mmu_notifier.h>
 #include <linux/iomap.h>
+#include <linux/rbtree.h>
 #include <asm/pgalloc.h>
 
 #define CREATE_TRACE_POINTS
@@ -310,6 +311,120 @@ static unsigned long dax_entry_size(void *entry)
                return PAGE_SIZE;
 }
 
+static struct kmem_cache *dax_rmap_node_cachep;
+static struct kmem_cache *dax_rmap_root_cachep;
+
+static int __init init_dax_rmap_cache(void)
+{
+       dax_rmap_root_cachep = KMEM_CACHE(rb_root_cached, 
SLAB_PANIC|SLAB_ACCOUNT);
+       dax_rmap_node_cachep = KMEM_CACHE(shared_file, SLAB_PANIC|SLAB_ACCOUNT);
+       return 0;
+}
+fs_initcall(init_dax_rmap_cache);
+
+struct rb_root_cached *dax_create_rbroot(void)
+{
+       struct rb_root_cached *root = kmem_cache_alloc(dax_rmap_root_cachep,
+                                                      GFP_KERNEL);
+
+       memset(root, 0, sizeof(struct rb_root_cached));
+       return root;
+}
+
+static bool dax_rmap_insert(struct page *page, struct address_space *mapping,
+                           pgoff_t index)
+{
+       struct rb_root_cached *root = (struct rb_root_cached 
*)page_private(page);
+       struct rb_node **new, *parent = NULL;
+       struct shared_file *p;
+       bool leftmost = true;
+
+       if (!root) {
+               root = dax_create_rbroot();
+               set_page_private(page, (unsigned long)root);
+               dax_rmap_insert(page, page->mapping, page->index);
+       }
+       new = &root->rb_root.rb_node;
+       /* Figure out where to insert new node */
+       while (*new) {
+               struct shared_file *this = container_of(*new, struct 
shared_file, node);
+               long result = (long)mapping - (long)this->mapping;
+
+               if (result == 0)
+                       result = (long)index - (long)this->index;
+               parent = *new;
+               if (result < 0)
+                       new = &((*new)->rb_left);
+               else if (result > 0) {
+                       new = &((*new)->rb_right);
+                       leftmost = false;
+               } else
+                       return false;
+       }
+       p = kmem_cache_alloc(dax_rmap_node_cachep, GFP_KERNEL);
+       p->mapping = mapping;
+       p->index = index;
+
+       /* Add new node and rebalance tree. */
+       rb_link_node(&p->node, parent, new);
+       rb_insert_color_cached(&p->node, root, leftmost);
+
+       return true;
+}
+
+static struct shared_file *dax_rmap_search(struct page *page,
+                                          struct address_space *mapping,
+                                          pgoff_t index)
+{
+       struct rb_root_cached *root = (struct rb_root_cached 
*)page_private(page);
+       struct rb_node *node = root->rb_root.rb_node;
+
+       while (node) {
+               struct shared_file *this = container_of(node, struct 
shared_file, node);
+               long result = (long)mapping - (long)this->mapping;
+
+               if (result == 0)
+                       result = (long)index - (long)this->index;
+               if (result < 0)
+                       node = node->rb_left;
+               else if (result > 0)
+                       node = node->rb_right;
+               else
+                       return this;
+       }
+       return NULL;
+}
+
+static void dax_rmap_delete(struct page *page, struct address_space *mapping,
+                           pgoff_t index)
+{
+       struct rb_root_cached *root = (struct rb_root_cached 
*)page_private(page);
+       struct shared_file *this;
+
+       if (!root) {
+               page->mapping = NULL;
+               page->index = 0;
+               return;
+       }
+
+       this = dax_rmap_search(page, mapping, index);
+       rb_erase_cached(&this->node, root);
+       kmem_cache_free(dax_rmap_node_cachep, this);
+
+       if (!RB_EMPTY_ROOT(&root->rb_root)) {
+               if (page->mapping == mapping && page->index == index) {
+                       this = container_of(rb_first_cached(root), struct 
shared_file, node);
+                       page->mapping = this->mapping;
+                       page->index = this->index;
+               }
+       } else {
+               kmem_cache_free(dax_rmap_root_cachep, root);
+               set_page_private(page, 0);
+               page->mapping = NULL;
+               page->index = 0;
+       }
+}
+
 static unsigned long dax_end_pfn(void *entry)
 {
        return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
@@ -341,16 +456,20 @@ static void dax_associate_entry(void *entry, struct 
address_space *mapping,
        for_each_mapped_pfn(entry, pfn) {
                struct page *page = pfn_to_page(pfn);
 
-               WARN_ON_ONCE(page->mapping);
-               page->mapping = mapping;
-               page->index = index + i++;
+               if (!page->mapping) {
+                       page->mapping = mapping;
+                       page->index = index + i++;
+               } else {
+                       dax_rmap_insert(page, mapping, index + i++);
+               }
        }
 }
 
 static void dax_disassociate_entry(void *entry, struct address_space *mapping,
-               bool trunc)
+                                  pgoff_t index, bool trunc)
 {
        unsigned long pfn;
+       int i = 0;
 
        if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
                return;
@@ -359,9 +478,19 @@ static void dax_disassociate_entry(void *entry, struct 
address_space *mapping,
                struct page *page = pfn_to_page(pfn);
 
                WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
-               WARN_ON_ONCE(page->mapping && page->mapping != mapping);
-               page->mapping = NULL;
-               page->index = 0;
+               WARN_ON_ONCE(!page->mapping);
+               dax_rmap_delete(page, mapping, index + i++);
+       }
+}
+
+static void __dax_decrease_nrexceptional(void *entry,
+                                        struct address_space *mapping)
+{
+       if (dax_is_empty_entry(entry) || dax_is_zero_entry(entry) ||
+           dax_is_pmd_entry(entry)) {
+               mapping->nrexceptional--;
+       } else {
+               mapping->nrexceptional -= PHYS_PFN(dax_entry_size(entry));
        }
 }
 
@@ -522,10 +651,10 @@ static void *grab_mapping_entry(struct xa_state *xas,
                        xas_lock_irq(xas);
                }
 
-               dax_disassociate_entry(entry, mapping, false);
+               dax_disassociate_entry(entry, mapping, index, false);
                xas_store(xas, NULL);   /* undo the PMD join */
                dax_wake_entry(xas, entry, true);
-               mapping->nrexceptional--;
+               __dax_decrease_nrexceptional(entry, mapping);
                entry = NULL;
                xas_set(xas, index);
        }
@@ -642,9 +771,9 @@ static int __dax_invalidate_entry(struct address_space 
*mapping,
            (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
             xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
                goto out;
-       dax_disassociate_entry(entry, mapping, trunc);
+       dax_disassociate_entry(entry, mapping, index, trunc);
        xas_store(&xas, NULL);
-       mapping->nrexceptional--;
+       __dax_decrease_nrexceptional(entry, mapping);
        ret = 1;
 out:
        put_unlocked_entry(&xas, entry);
@@ -737,7 +866,7 @@ static void *dax_insert_entry(struct xa_state *xas,
        if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
                void *old;
 
-               dax_disassociate_entry(entry, mapping, false);
+               dax_disassociate_entry(entry, mapping, xas->xa_index, false);
                dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
                /*
                 * Only swap our new entry into the page cache if the current
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d7af5d243f24..1e2e81c701b6 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -39,6 +39,12 @@ struct dax_operations {
        int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
 };
 
+struct shared_file {
+       struct address_space *mapping;
+       pgoff_t index;
+       struct rb_node node;
+};
+
 extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
-- 
2.26.2


_______________________________________________
Linux-nvdimm mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to