[PATCH v2 13/20] mm, dax, pmem: introduce pfn_t

2015-10-09 Thread Dan Williams
In preparation for enabling get_user_pages() operations on dax mappings,
introduce a type that encapsulates a page-frame-number that can also be
used to encode other information.  This other information is the
historical "page_link" encoding in a scatterlist, but can also denote
"device memory".  Where "device memory" is a set of pfns that are not
part of the kernel's linear mapping by default, but are accessed via the
same memory controller as ram.  The motivation for this new type is
large capacity persistent memory that optionally has struct page entries
in the 'memmap'.

When a driver, like pmem, has established a devm_memremap_pages()
mapping it needs to communicate to upper layers that the pfn has a page
backing.  This property will be leveraged in a later patch to enable
dax-gup.  For now, update all the ->direct_access() implementations to
communicate whether the returned pfn range is mapped.

Cc: Christoph Hellwig 
Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 arch/powerpc/sysdev/axonram.c |8 ++---
 drivers/block/brd.c   |4 +--
 drivers/nvdimm/pmem.c |   25 ++--
 drivers/s390/block/dcssblk.c  |   10 +++---
 fs/block_dev.c|2 +
 fs/dax.c  |   19 ++--
 include/linux/blkdev.h|4 +--
 include/linux/mm.h|   65 +
 include/linux/pfn.h   |9 ++
 9 files changed, 105 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d2b79bc336c1..59ca4c0ab529 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,15 +141,13 @@ axon_ram_make_request(struct request_queue *queue, struct 
bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-  void __pmem **kaddr, unsigned long *pfn)
+  void __pmem **kaddr, pfn_t *pfn)
 {
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
-   void *addr = (void *)(bank->ph_addr + offset);
-
-   *kaddr = (void __pmem *)addr;
-   *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
+   *kaddr = (void __pmem __force *) bank->io_addr + offset;
+   *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
 }
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b9794aeeb878..0bbc60463779 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t 
sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-   void __pmem **kaddr, unsigned long *pfn)
+   void __pmem **kaddr, pfn_t *pfn)
 {
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -385,7 +385,7 @@ static long brd_direct_access(struct block_device *bdev, 
sector_t sector,
if (!page)
return -ENOSPC;
*kaddr = (void __pmem *)page_address(page);
-   *pfn = page_to_pfn(page);
+   *pfn = page_to_pfn_t(page);
 
return PAGE_SIZE;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index bb66158c0505..c950602bbf0b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -39,6 +39,7 @@ struct pmem_device {
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
+   unsigned long   pfn_flags;
void __pmem *virt_addr;
size_t  size;
 };
@@ -100,26 +101,15 @@ static int pmem_rw_page(struct block_device *bdev, 
sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
 {
struct pmem_device *pmem = bdev->bd_disk->private_data;
resource_size_t offset = sector * 512 + pmem->data_offset;
-   resource_size_t size;
-
-   if (pmem->data_offset) {
-   /*
-* Limit the direct_access() size to what is covered by
-* the memmap
-*/
-   size = (pmem->size - offset) & ~ND_PFN_MASK;
-   } else
-   size = pmem->size - offset;
 
-   /* FIXME convert DAX to comprehend that this mapping has a lifetime */
*kaddr = pmem->virt_addr + offset;
-   *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
-   return size;
+   return pmem->size - offset;
 }
 
 static const struct block_device_operations pmem_fops = {
@@ -150,10 +140,12 @@ static struct pmem_device *pmem_alloc(struct device 

[PATCH v2 13/20] mm, dax, pmem: introduce pfn_t

2015-10-09 Thread Dan Williams
In preparation for enabling get_user_pages() operations on dax mappings,
introduce a type that encapsulates a page-frame-number that can also be
used to encode other information.  This other information is the
historical "page_link" encoding in a scatterlist, but can also denote
"device memory".  Where "device memory" is a set of pfns that are not
part of the kernel's linear mapping by default, but are accessed via the
same memory controller as ram.  The motivation for this new type is
large capacity persistent memory that optionally has struct page entries
in the 'memmap'.

When a driver, like pmem, has established a devm_memremap_pages()
mapping it needs to communicate to upper layers that the pfn has a page
backing.  This property will be leveraged in a later patch to enable
dax-gup.  For now, update all the ->direct_access() implementations to
communicate whether the returned pfn range is mapped.

Cc: Christoph Hellwig 
Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 arch/powerpc/sysdev/axonram.c |8 ++---
 drivers/block/brd.c   |4 +--
 drivers/nvdimm/pmem.c |   25 ++--
 drivers/s390/block/dcssblk.c  |   10 +++---
 fs/block_dev.c|2 +
 fs/dax.c  |   19 ++--
 include/linux/blkdev.h|4 +--
 include/linux/mm.h|   65 +
 include/linux/pfn.h   |9 ++
 9 files changed, 105 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d2b79bc336c1..59ca4c0ab529 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,15 +141,13 @@ axon_ram_make_request(struct request_queue *queue, struct 
bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-  void __pmem **kaddr, unsigned long *pfn)
+  void __pmem **kaddr, pfn_t *pfn)
 {
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
-   void *addr = (void *)(bank->ph_addr + offset);
-
-   *kaddr = (void __pmem *)addr;
-   *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
+   *kaddr = (void __pmem __force *) bank->io_addr + offset;
+   *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
 }
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b9794aeeb878..0bbc60463779 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t 
sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-   void __pmem **kaddr, unsigned long *pfn)
+   void __pmem **kaddr, pfn_t *pfn)
 {
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -385,7 +385,7 @@ static long brd_direct_access(struct block_device *bdev, 
sector_t sector,
if (!page)
return -ENOSPC;
*kaddr = (void __pmem *)page_address(page);
-   *pfn = page_to_pfn(page);
+   *pfn = page_to_pfn_t(page);
 
return PAGE_SIZE;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index bb66158c0505..c950602bbf0b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -39,6 +39,7 @@ struct pmem_device {
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
+   unsigned long   pfn_flags;
void __pmem *virt_addr;
size_t  size;
 };
@@ -100,26 +101,15 @@ static int pmem_rw_page(struct block_device *bdev, 
sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
 {
struct pmem_device *pmem = bdev->bd_disk->private_data;
resource_size_t offset = sector * 512 + pmem->data_offset;
-   resource_size_t size;
-
-   if (pmem->data_offset) {
-   /*
-* Limit the direct_access() size to what is covered by
-* the memmap
-*/
-   size = (pmem->size - offset) & ~ND_PFN_MASK;
-   } else
-   size = pmem->size - offset;
 
-   /* FIXME convert DAX to comprehend that this mapping has a lifetime */
*kaddr = pmem->virt_addr + offset;
-   *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
-   return size;
+   return pmem->size - offset;
 }
 
 static const struct