[PATCH v2 04/20] mm: introduce __get_dev_pagemap()

2015-10-09 Thread Dan Williams
There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:

1/ We want to augment vmemmap_populate() (called via arch_add_memory())
   to allocate memmap storage from pre-allocated pages reserved by the
   device driver.  At vmemmap_alloc_block_buf() time it grabs device pages
   rather than page allocator pages.  This is in support of
   devm_memremap_pages() mappings where the memmap is too large to fit in
   main memory (i.e. large persistent memory devices).

2/ Taking a reference against the mapping when inserting device pages
   into the address_space radix of a given inode.  This facilitates
   unmap_mapping_range() and truncate_inode_pages() operations when the
   driver is tearing down the mapping.

3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
   reference against the mapping so that the driver teardown path can
   revoke and drain usage of device pages.

Cc: Christoph Hellwig 
Cc: Dave Chinner 
Cc: Andrew Morton 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 include/linux/mm.h |   18 ++
 kernel/memremap.c  |   40 
 2 files changed, 58 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..30c3c8764649 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,24 @@ static inline enum zone_type page_zonenum(const struct 
page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+   /* TODO: vmem_altmap and percpu_ref count */
+   struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys);
+#else
+static inline struct dev_pagemap *get_dev_pagemap(resource_size_t phys)
+{
+   return NULL;
+}
+#endif
+
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 3218e8b1fc28..64bfd9fa93aa 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,6 +10,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include 
 #include 
 #include 
 #include 
@@ -138,18 +139,52 @@ void devm_memunmap(struct device *dev, void *addr)
 EXPORT_SYMBOL(devm_memunmap);
 
 #ifdef CONFIG_ZONE_DEVICE
+static LIST_HEAD(ranges);
+static DEFINE_SPINLOCK(range_lock);
+
 struct page_map {
struct resource res;
+   struct dev_pagemap pgmap;
+   struct list_head list;
 };
 
+static void add_page_map(struct page_map *page_map)
+{
+   spin_lock(_lock);
+   list_add_rcu(_map->list, );
+   spin_unlock(_lock);
+}
+
+static void del_page_map(struct page_map *page_map)
+{
+   spin_lock(_lock);
+   list_del_rcu(_map->list);
+   spin_unlock(_lock);
+}
+
 static void devm_memremap_pages_release(struct device *dev, void *res)
 {
struct page_map *page_map = res;
 
+   del_page_map(page_map);
+
/* pages are dead and unused, undo the arch mapping */
arch_remove_memory(page_map->res.start, resource_size(_map->res));
 }
 
+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys)
+{
+   struct page_map *page_map;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   list_for_each_entry_rcu(page_map, , list)
+   if (phys >= page_map->res.start && phys <= page_map->res.end)
+   return _map->pgmap;
+   return NULL;
+}
+
 void *devm_memremap_pages(struct device *dev, struct resource *res)
 {
int is_ram = region_intersects(res->start, resource_size(res),
@@ -173,12 +208,17 @@ void *devm_memremap_pages(struct device *dev, struct 
resource *res)
 
memcpy(_map->res, res, sizeof(*res));
 
+   page_map->pgmap.dev = dev;
+   INIT_LIST_HEAD(_map->list);
+   add_page_map(page_map);
+
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();
 
error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
+   del_page_map(page_map);
devres_free(page_map);
return ERR_PTR(error);
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 04/20] mm: introduce __get_dev_pagemap()

2015-10-09 Thread Dan Williams
There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:

1/ We want to augment vmemmap_populate() (called via arch_add_memory())
   to allocate memmap storage from pre-allocated pages reserved by the
   device driver.  At vmemmap_alloc_block_buf() time it grabs device pages
   rather than page allocator pages.  This is in support of
   devm_memremap_pages() mappings where the memmap is too large to fit in
   main memory (i.e. large persistent memory devices).

2/ Taking a reference against the mapping when inserting device pages
   into the address_space radix of a given inode.  This facilitates
   unmap_mapping_range() and truncate_inode_pages() operations when the
   driver is tearing down the mapping.

3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
   reference against the mapping so that the driver teardown path can
   revoke and drain usage of device pages.

Cc: Christoph Hellwig 
Cc: Dave Chinner 
Cc: Andrew Morton 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 include/linux/mm.h |   18 ++
 kernel/memremap.c  |   40 
 2 files changed, 58 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..30c3c8764649 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,24 @@ static inline enum zone_type page_zonenum(const struct 
page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+   /* TODO: vmem_altmap and percpu_ref count */
+   struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys);
+#else
+static inline struct dev_pagemap *get_dev_pagemap(resource_size_t phys)
+{
+   return NULL;
+}
+#endif
+
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 3218e8b1fc28..64bfd9fa93aa 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,6 +10,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include 
 #include 
 #include 
 #include 
@@ -138,18 +139,52 @@ void devm_memunmap(struct device *dev, void *addr)
 EXPORT_SYMBOL(devm_memunmap);
 
 #ifdef CONFIG_ZONE_DEVICE
+static LIST_HEAD(ranges);
+static DEFINE_SPINLOCK(range_lock);
+
 struct page_map {
struct resource res;
+   struct dev_pagemap pgmap;
+   struct list_head list;
 };
 
+static void add_page_map(struct page_map *page_map)
+{
+   spin_lock(_lock);
+   list_add_rcu(_map->list, );
+   spin_unlock(_lock);
+}
+
+static void del_page_map(struct page_map *page_map)
+{
+   spin_lock(_lock);
+   list_del_rcu(_map->list);
+   spin_unlock(_lock);
+}
+
 static void devm_memremap_pages_release(struct device *dev, void *res)
 {
struct page_map *page_map = res;
 
+   del_page_map(page_map);
+
/* pages are dead and unused, undo the arch mapping */
arch_remove_memory(page_map->res.start, resource_size(_map->res));
 }
 
+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys)
+{
+   struct page_map *page_map;
+
+   WARN_ON_ONCE(!rcu_read_lock_held());
+
+   list_for_each_entry_rcu(page_map, , list)
+   if (phys >= page_map->res.start && phys <= page_map->res.end)
+   return _map->pgmap;
+   return NULL;
+}
+
 void *devm_memremap_pages(struct device *dev, struct resource *res)
 {
int is_ram = region_intersects(res->start, resource_size(res),
@@ -173,12 +208,17 @@ void *devm_memremap_pages(struct device *dev, struct 
resource *res)
 
memcpy(_map->res, res, sizeof(*res));
 
+   page_map->pgmap.dev = dev;
+   INIT_LIST_HEAD(_map->list);
+   add_page_map(page_map);
+
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();
 
error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
+   del_page_map(page_map);
devres_free(page_map);
return ERR_PTR(error);
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/