Re: [Linux-nvdimm] [PATCH v2 08/10] x86: support kmap_atomic_pfn_t() for persistent memory
On Wed, May 6, 2015 at 1:05 PM, Dan Williams wrote: > It would be unfortunate if the kmap infrastructure escaped its current > 32-bit/HIGHMEM bonds and leaked into 64-bit code. Instead, if the user > has enabled CONFIG_PMEM_IO we direct the kmap_atomic_pfn_t() > implementation to scan a list of pre-mapped persistent memory address > ranges inserted by the pmem driver. > > The __pfn_t to resource lookup is indeed inefficient walking of a linked list, > but there are two mitigating factors: > > 1/ The number of persistent memory ranges is bounded by the number of >DIMMs which is on the order of 10s of DIMMs, not hundreds. > > 2/ The lookup yields the entire range, if it becomes inefficient to do a >kmap_atomic_pfn_t() a PAGE_SIZE at a time the caller can take >advantage of the fact that the lookup can be amortized for all kmap >operations it needs to perform in a given range. > > Signed-off-by: Dan Williams > --- > arch/Kconfig |3 + > arch/x86/Kconfig |2 + > arch/x86/kernel/Makefile |1 > arch/x86/kernel/kmap.c | 95 > ++ > drivers/block/pmem.c |6 +++ > include/linux/highmem.h | 23 +++ > 6 files changed, 130 insertions(+) > create mode 100644 arch/x86/kernel/kmap.c > > diff --git a/arch/Kconfig b/arch/Kconfig > index f7f800860c00..69d3a3fa21af 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -206,6 +206,9 @@ config HAVE_DMA_CONTIGUOUS > config HAVE_DMA_PFN > bool > > +config HAVE_KMAP_PFN > + bool > + > config GENERIC_SMP_IDLE_THREAD > bool > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 1fae5e842423..eddaea839500 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -1434,7 +1434,9 @@ config X86_PMEM_LEGACY > Say Y if unsure. > > config X86_PMEM_DMA > + depends on !HIGHMEM > def_bool PMEM_IO > + select HAVE_KMAP_PFN > select HAVE_DMA_PFN > > config HIGHPTE > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile > index 9bcd0b56ca17..44c323342996 100644 > --- a/arch/x86/kernel/Makefile > +++ b/arch/x86/kernel/Makefile > @@ -96,6 +96,7 @@ obj-$(CONFIG_PARAVIRT)+= paravirt.o > paravirt_patch_$(BITS).o > obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o > obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o > obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o > +obj-$(CONFIG_X86_PMEM_DMA) += kmap.o > > obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o > > diff --git a/arch/x86/kernel/kmap.c b/arch/x86/kernel/kmap.c > new file mode 100644 > index ..d597c475377b > --- /dev/null > +++ b/arch/x86/kernel/kmap.c > @@ -0,0 +1,95 @@ > +/* > + * Copyright(c) 2015 Intel Corporation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of version 2 of the GNU General Public License as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + */ > +#include > +#include > +#include > +#include > +#include > +#include > + > +static LIST_HEAD(ranges); > + > +struct kmap { > + struct list_head list; > + struct resource *res; > + struct device *dev; > + void *base; > +}; > + > +static void teardown_kmap(void *data) > +{ > + struct kmap *kmap = data; > + > + dev_dbg(kmap->dev, "kmap unregister %pr\n", kmap->res); > + list_del_rcu(>list); > + synchronize_rcu(); > + kfree(kmap); > +} > + > +int devm_register_kmap_pfn_range(struct device *dev, struct resource *res, > + void *base) > +{ > + struct kmap *kmap = kzalloc(sizeof(*kmap), GFP_KERNEL); > + int rc; > + > + if (!kmap) > + return -ENOMEM; > + > + INIT_LIST_HEAD(>list); > + kmap->res = res; > + kmap->base = base; > + kmap->dev = dev; > + rc = devm_add_action(dev, teardown_kmap, kmap); > + if (rc) { > + kfree(kmap); > + return rc; > + } > + dev_dbg(kmap->dev, "kmap register %pr\n", kmap->res); > + list_add_rcu(>list, ); > + return 0; > +} > +EXPORT_SYMBOL_GPL(devm_register_kmap_pfn_range); > + > +void *kmap_atomic_pfn_t(__pfn_t pfn) > +{ > + struct page *page = __pfn_t_to_page(pfn); > + resource_size_t addr; > + struct kmap *kmap; > + > + if (page) > + return kmap_atomic(page); > + addr = __pfn_t_to_phys(pfn); > + rcu_read_lock(); > + list_for_each_entry_rcu(kmap, , list) > + if (addr >= kmap->res->start && addr <= kmap->res->end) > + return kmap->base + addr - kmap->res->start; > + > + /* only unlock in the error case */
Re: [Linux-nvdimm] [PATCH v2 08/10] x86: support kmap_atomic_pfn_t() for persistent memory
On Wed, May 6, 2015 at 1:05 PM, Dan Williams dan.j.willi...@intel.com wrote: It would be unfortunate if the kmap infrastructure escaped its current 32-bit/HIGHMEM bonds and leaked into 64-bit code. Instead, if the user has enabled CONFIG_PMEM_IO we direct the kmap_atomic_pfn_t() implementation to scan a list of pre-mapped persistent memory address ranges inserted by the pmem driver. The __pfn_t to resource lookup is indeed inefficient walking of a linked list, but there are two mitigating factors: 1/ The number of persistent memory ranges is bounded by the number of DIMMs which is on the order of 10s of DIMMs, not hundreds. 2/ The lookup yields the entire range, if it becomes inefficient to do a kmap_atomic_pfn_t() a PAGE_SIZE at a time the caller can take advantage of the fact that the lookup can be amortized for all kmap operations it needs to perform in a given range. Signed-off-by: Dan Williams dan.j.willi...@intel.com --- arch/Kconfig |3 + arch/x86/Kconfig |2 + arch/x86/kernel/Makefile |1 arch/x86/kernel/kmap.c | 95 ++ drivers/block/pmem.c |6 +++ include/linux/highmem.h | 23 +++ 6 files changed, 130 insertions(+) create mode 100644 arch/x86/kernel/kmap.c diff --git a/arch/Kconfig b/arch/Kconfig index f7f800860c00..69d3a3fa21af 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -206,6 +206,9 @@ config HAVE_DMA_CONTIGUOUS config HAVE_DMA_PFN bool +config HAVE_KMAP_PFN + bool + config GENERIC_SMP_IDLE_THREAD bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1fae5e842423..eddaea839500 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1434,7 +1434,9 @@ config X86_PMEM_LEGACY Say Y if unsure. config X86_PMEM_DMA + depends on !HIGHMEM def_bool PMEM_IO + select HAVE_KMAP_PFN select HAVE_DMA_PFN config HIGHPTE diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9bcd0b56ca17..44c323342996 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_PARAVIRT)+= paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o +obj-$(CONFIG_X86_PMEM_DMA) += kmap.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/kmap.c b/arch/x86/kernel/kmap.c new file mode 100644 index ..d597c475377b --- /dev/null +++ b/arch/x86/kernel/kmap.c @@ -0,0 +1,95 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include linux/rcupdate.h +#include linux/rculist.h +#include linux/highmem.h +#include linux/device.h +#include linux/slab.h +#include linux/mm.h + +static LIST_HEAD(ranges); + +struct kmap { + struct list_head list; + struct resource *res; + struct device *dev; + void *base; +}; + +static void teardown_kmap(void *data) +{ + struct kmap *kmap = data; + + dev_dbg(kmap-dev, kmap unregister %pr\n, kmap-res); + list_del_rcu(kmap-list); + synchronize_rcu(); + kfree(kmap); +} + +int devm_register_kmap_pfn_range(struct device *dev, struct resource *res, + void *base) +{ + struct kmap *kmap = kzalloc(sizeof(*kmap), GFP_KERNEL); + int rc; + + if (!kmap) + return -ENOMEM; + + INIT_LIST_HEAD(kmap-list); + kmap-res = res; + kmap-base = base; + kmap-dev = dev; + rc = devm_add_action(dev, teardown_kmap, kmap); + if (rc) { + kfree(kmap); + return rc; + } + dev_dbg(kmap-dev, kmap register %pr\n, kmap-res); + list_add_rcu(kmap-list, ranges); + return 0; +} +EXPORT_SYMBOL_GPL(devm_register_kmap_pfn_range); + +void *kmap_atomic_pfn_t(__pfn_t pfn) +{ + struct page *page = __pfn_t_to_page(pfn); + resource_size_t addr; + struct kmap *kmap; + + if (page) + return kmap_atomic(page); + addr = __pfn_t_to_phys(pfn); + rcu_read_lock(); + list_for_each_entry_rcu(kmap, ranges, list) + if (addr = kmap-res-start addr = kmap-res-end) + return kmap-base + addr - kmap-res-start; + + /* only unlock in the error case */ +