Re: [Linux-nvdimm] [PATCH v2 08/10] x86: support kmap_atomic_pfn_t() for persistent memory

2015-05-06 Thread Dan Williams
On Wed, May 6, 2015 at 1:05 PM, Dan Williams  wrote:
> It would be unfortunate if the kmap infrastructure escaped its current
> 32-bit/HIGHMEM bonds and leaked into 64-bit code.  Instead, if the user
> has enabled CONFIG_PMEM_IO we direct the kmap_atomic_pfn_t()
> implementation to scan a list of pre-mapped persistent memory address
> ranges inserted by the pmem driver.
>
> The __pfn_t to resource lookup is indeed inefficient walking of a linked list,
> but there are two mitigating factors:
>
> 1/ The number of persistent memory ranges is bounded by the number of
>DIMMs which is on the order of 10s of DIMMs, not hundreds.
>
> 2/ The lookup yields the entire range, if it becomes inefficient to do a
>kmap_atomic_pfn_t() a PAGE_SIZE at a time the caller can take
>advantage of the fact that the lookup can be amortized for all kmap
>operations it needs to perform in a given range.
>
> Signed-off-by: Dan Williams 
> ---
>  arch/Kconfig |3 +
>  arch/x86/Kconfig |2 +
>  arch/x86/kernel/Makefile |1
>  arch/x86/kernel/kmap.c   |   95 
> ++
>  drivers/block/pmem.c |6 +++
>  include/linux/highmem.h  |   23 +++
>  6 files changed, 130 insertions(+)
>  create mode 100644 arch/x86/kernel/kmap.c
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index f7f800860c00..69d3a3fa21af 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -206,6 +206,9 @@ config HAVE_DMA_CONTIGUOUS
>  config HAVE_DMA_PFN
> bool
>
> +config HAVE_KMAP_PFN
> +   bool
> +
>  config GENERIC_SMP_IDLE_THREAD
> bool
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 1fae5e842423..eddaea839500 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1434,7 +1434,9 @@ config X86_PMEM_LEGACY
>   Say Y if unsure.
>
>  config X86_PMEM_DMA
> +   depends on !HIGHMEM
> def_bool PMEM_IO
> +   select HAVE_KMAP_PFN
> select HAVE_DMA_PFN
>
>  config HIGHPTE
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 9bcd0b56ca17..44c323342996 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -96,6 +96,7 @@ obj-$(CONFIG_PARAVIRT)+= paravirt.o 
> paravirt_patch_$(BITS).o
>  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
>  obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
>  obj-$(CONFIG_X86_PMEM_LEGACY)  += pmem.o
> +obj-$(CONFIG_X86_PMEM_DMA) += kmap.o
>
>  obj-$(CONFIG_PCSPKR_PLATFORM)  += pcspeaker.o
>
> diff --git a/arch/x86/kernel/kmap.c b/arch/x86/kernel/kmap.c
> new file mode 100644
> index ..d597c475377b
> --- /dev/null
> +++ b/arch/x86/kernel/kmap.c
> @@ -0,0 +1,95 @@
> +/*
> + * Copyright(c) 2015 Intel Corporation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of version 2 of the GNU General Public License as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static LIST_HEAD(ranges);
> +
> +struct kmap {
> +   struct list_head list;
> +   struct resource *res;
> +   struct device *dev;
> +   void *base;
> +};
> +
> +static void teardown_kmap(void *data)
> +{
> +   struct kmap *kmap = data;
> +
> +   dev_dbg(kmap->dev, "kmap unregister %pr\n", kmap->res);
> +   list_del_rcu(>list);
> +   synchronize_rcu();
> +   kfree(kmap);
> +}
> +
> +int devm_register_kmap_pfn_range(struct device *dev, struct resource *res,
> +   void *base)
> +{
> +   struct kmap *kmap = kzalloc(sizeof(*kmap), GFP_KERNEL);
> +   int rc;
> +
> +   if (!kmap)
> +   return -ENOMEM;
> +
> +   INIT_LIST_HEAD(>list);
> +   kmap->res = res;
> +   kmap->base = base;
> +   kmap->dev = dev;
> +   rc = devm_add_action(dev, teardown_kmap, kmap);
> +   if (rc) {
> +   kfree(kmap);
> +   return rc;
> +   }
> +   dev_dbg(kmap->dev, "kmap register %pr\n", kmap->res);
> +   list_add_rcu(>list, );
> +   return 0;
> +}
> +EXPORT_SYMBOL_GPL(devm_register_kmap_pfn_range);
> +
> +void *kmap_atomic_pfn_t(__pfn_t pfn)
> +{
> +   struct page *page = __pfn_t_to_page(pfn);
> +   resource_size_t addr;
> +   struct kmap *kmap;
> +
> +   if (page)
> +   return kmap_atomic(page);
> +   addr = __pfn_t_to_phys(pfn);
> +   rcu_read_lock();
> +   list_for_each_entry_rcu(kmap, , list)
> +   if (addr >= kmap->res->start && addr <= kmap->res->end)
> +   return kmap->base + addr - kmap->res->start;
> +
> +   /* only unlock in the error case */

Re: [Linux-nvdimm] [PATCH v2 08/10] x86: support kmap_atomic_pfn_t() for persistent memory

2015-05-06 Thread Dan Williams
On Wed, May 6, 2015 at 1:05 PM, Dan Williams dan.j.willi...@intel.com wrote:
 It would be unfortunate if the kmap infrastructure escaped its current
 32-bit/HIGHMEM bonds and leaked into 64-bit code.  Instead, if the user
 has enabled CONFIG_PMEM_IO we direct the kmap_atomic_pfn_t()
 implementation to scan a list of pre-mapped persistent memory address
 ranges inserted by the pmem driver.

 The __pfn_t to resource lookup is indeed inefficient walking of a linked list,
 but there are two mitigating factors:

 1/ The number of persistent memory ranges is bounded by the number of
DIMMs which is on the order of 10s of DIMMs, not hundreds.

 2/ The lookup yields the entire range, if it becomes inefficient to do a
kmap_atomic_pfn_t() a PAGE_SIZE at a time the caller can take
advantage of the fact that the lookup can be amortized for all kmap
operations it needs to perform in a given range.

 Signed-off-by: Dan Williams dan.j.willi...@intel.com
 ---
  arch/Kconfig |3 +
  arch/x86/Kconfig |2 +
  arch/x86/kernel/Makefile |1
  arch/x86/kernel/kmap.c   |   95 
 ++
  drivers/block/pmem.c |6 +++
  include/linux/highmem.h  |   23 +++
  6 files changed, 130 insertions(+)
  create mode 100644 arch/x86/kernel/kmap.c

 diff --git a/arch/Kconfig b/arch/Kconfig
 index f7f800860c00..69d3a3fa21af 100644
 --- a/arch/Kconfig
 +++ b/arch/Kconfig
 @@ -206,6 +206,9 @@ config HAVE_DMA_CONTIGUOUS
  config HAVE_DMA_PFN
 bool

 +config HAVE_KMAP_PFN
 +   bool
 +
  config GENERIC_SMP_IDLE_THREAD
 bool

 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
 index 1fae5e842423..eddaea839500 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
 @@ -1434,7 +1434,9 @@ config X86_PMEM_LEGACY
   Say Y if unsure.

  config X86_PMEM_DMA
 +   depends on !HIGHMEM
 def_bool PMEM_IO
 +   select HAVE_KMAP_PFN
 select HAVE_DMA_PFN

  config HIGHPTE
 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
 index 9bcd0b56ca17..44c323342996 100644
 --- a/arch/x86/kernel/Makefile
 +++ b/arch/x86/kernel/Makefile
 @@ -96,6 +96,7 @@ obj-$(CONFIG_PARAVIRT)+= paravirt.o 
 paravirt_patch_$(BITS).o
  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
  obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
  obj-$(CONFIG_X86_PMEM_LEGACY)  += pmem.o
 +obj-$(CONFIG_X86_PMEM_DMA) += kmap.o

  obj-$(CONFIG_PCSPKR_PLATFORM)  += pcspeaker.o

 diff --git a/arch/x86/kernel/kmap.c b/arch/x86/kernel/kmap.c
 new file mode 100644
 index ..d597c475377b
 --- /dev/null
 +++ b/arch/x86/kernel/kmap.c
 @@ -0,0 +1,95 @@
 +/*
 + * Copyright(c) 2015 Intel Corporation. All rights reserved.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of version 2 of the GNU General Public License as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful, but
 + * WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + */
 +#include linux/rcupdate.h
 +#include linux/rculist.h
 +#include linux/highmem.h
 +#include linux/device.h
 +#include linux/slab.h
 +#include linux/mm.h
 +
 +static LIST_HEAD(ranges);
 +
 +struct kmap {
 +   struct list_head list;
 +   struct resource *res;
 +   struct device *dev;
 +   void *base;
 +};
 +
 +static void teardown_kmap(void *data)
 +{
 +   struct kmap *kmap = data;
 +
 +   dev_dbg(kmap-dev, kmap unregister %pr\n, kmap-res);
 +   list_del_rcu(kmap-list);
 +   synchronize_rcu();
 +   kfree(kmap);
 +}
 +
 +int devm_register_kmap_pfn_range(struct device *dev, struct resource *res,
 +   void *base)
 +{
 +   struct kmap *kmap = kzalloc(sizeof(*kmap), GFP_KERNEL);
 +   int rc;
 +
 +   if (!kmap)
 +   return -ENOMEM;
 +
 +   INIT_LIST_HEAD(kmap-list);
 +   kmap-res = res;
 +   kmap-base = base;
 +   kmap-dev = dev;
 +   rc = devm_add_action(dev, teardown_kmap, kmap);
 +   if (rc) {
 +   kfree(kmap);
 +   return rc;
 +   }
 +   dev_dbg(kmap-dev, kmap register %pr\n, kmap-res);
 +   list_add_rcu(kmap-list, ranges);
 +   return 0;
 +}
 +EXPORT_SYMBOL_GPL(devm_register_kmap_pfn_range);
 +
 +void *kmap_atomic_pfn_t(__pfn_t pfn)
 +{
 +   struct page *page = __pfn_t_to_page(pfn);
 +   resource_size_t addr;
 +   struct kmap *kmap;
 +
 +   if (page)
 +   return kmap_atomic(page);
 +   addr = __pfn_t_to_phys(pfn);
 +   rcu_read_lock();
 +   list_for_each_entry_rcu(kmap, ranges, list)
 +   if (addr = kmap-res-start  addr = kmap-res-end)
 +   return kmap-base + addr - kmap-res-start;
 +
 +   /* only unlock in the error case */
 +