Re: [PATCH v5] x86: fix kaslr and memmap collision
Add Kees to let him have a look at this too. On 01/05/17 at 05:21pm, Baoquan He wrote: > On 01/04/17 at 11:29am, Dave Jiang wrote: > > CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address. > > However it does not take into account the memmap= parameter passed in from > > the kernel cmdline. This results in the kernel sometimes being put in > > the middle of memmap. Teaching kaslr to not insert the kernel in > > memmap defined regions. We will support up to 4 memmap regions. Any > > additional regions will cause kaslr to disable. The mem_avoid set has > > been augmented to add up to 4 unusable regions of memmaps provided by the > > user to exclude those regions from the set of valid address range to insert > > the uncompressed kernel image. The nn@ss ranges will be skipped by the > > mem_avoid set since it indicates memory useable. > > > > Signed-off-by: Dave Jiang> > --- > > > > v2: > > Addressing comments from Ingo. > > - Handle entire list of memmaps > > v3: > > Fix 32bit build issue > > v4: > > Addressing comments from Baoquan > > - Not exclude nn@ss ranges > > v5: > > Addressing additional comments from Baoquan > > - Update commit header and various coding style changes > > > > diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h > > index e5612f3..59c2075 100644 > > --- a/arch/x86/boot/boot.h > > +++ b/arch/x86/boot/boot.h > > @@ -332,7 +332,10 @@ int strncmp(const char *cs, const char *ct, size_t > > count); > > size_t strnlen(const char *s, size_t maxlen); > > unsigned int atou(const char *s); > > unsigned long long simple_strtoull(const char *cp, char **endp, unsigned > > int base); > > +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int > > base); > > +long simple_strtol(const char *cp, char **endp, unsigned int base); > > size_t strlen(const char *s); > > +char *strchr(const char *s, int c); > > > > /* tty.c */ > > void puts(const char *); > > diff --git a/arch/x86/boot/compressed/kaslr.c > > b/arch/x86/boot/compressed/kaslr.c > > index a66854d..036b514 100644 > > --- a/arch/x86/boot/compressed/kaslr.c > > +++ b/arch/x86/boot/compressed/kaslr.c > > @@ -11,6 +11,7 @@ > > */ > > #include "misc.h" > > #include "error.h" > > +#include "../boot.h" > > > > #include > > #include > > @@ -56,11 +57,16 @@ struct mem_vector { > > unsigned long size; > > }; > > > > +/* only supporting at most 4 unusable memmap regions with kaslr */ > > +#define MAX_MEMMAP_REGIONS 4 > > + > > enum mem_avoid_index { > > MEM_AVOID_ZO_RANGE = 0, > > MEM_AVOID_INITRD, > > MEM_AVOID_CMDLINE, > > MEM_AVOID_BOOTPARAMS, > > + MEM_AVOID_MEMMAP_BEGIN, > > + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, > > MEM_AVOID_MAX, > > }; > > > > @@ -77,6 +83,121 @@ static bool mem_overlaps(struct mem_vector *one, struct > > mem_vector *two) > > return true; > > } > > > > +/** > > + * _memparse - parse a string with mem suffixes into a number > > + * @ptr: Where parse begins > > + * @retptr: (output) Optional pointer to next char after parse completes > > + * > > + * Parses a string into a number. The number stored at @ptr is > > + * potentially suffixed with K, M, G, T, P, E. > > + */ > > +static unsigned long long _memparse(const char *ptr, char **retptr) > > +{ > > + char *endptr; /* local pointer to end of parsed string */ > > + > > + unsigned long long ret = simple_strtoull(ptr, , 0); > > + > > + switch (*endptr) { > > + case 'E': > > + case 'e': > > + ret <<= 10; > > + case 'P': > > + case 'p': > > + ret <<= 10; > > + case 'T': > > + case 't': > > + ret <<= 10; > > + case 'G': > > + case 'g': > > + ret <<= 10; > > + case 'M': > > + case 'm': > > + ret <<= 10; > > + case 'K': > > + case 'k': > > + ret <<= 10; > > + endptr++; > > + default: > > + break; > > + } > > + > > + if (retptr) > > + *retptr = endptr; > > + > > + return ret; > > +} > > + > > +static int > > +parse_memmap(char *p, unsigned long long *start, unsigned long long *size) > > +{ > > + char *oldp; > > + > > + if (!p) > > + return -EINVAL; > > + > > + /* we don't care about this option here */ > > + if (!strncmp(p, "exactmap", 8)) > > + return -EINVAL; > > + > > + oldp = p; > > + *size = _memparse(p, ); > > + if (p == oldp) > > + return -EINVAL; > > + > > + switch (*p) { > > + case '@': > > + /* skip this region, usable */ > > + *start = 0; > > + *size = 0; > > + return 0; > > + case '#': > > + case '$': > > + case '!': > > + *start = _memparse(p + 1, ); > > + return 0; > > + } > > + > > + return -EINVAL; > > +} > > + > > +static int mem_avoid_memmap(void) > > +{ > > + char arg[128]; > > + int rc = 0; > > + > > + /* see if we have any memmap areas */ > > + if
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 05:30:34PM -0700, Jason Gunthorpe wrote: > On Thu, Jan 05, 2017 at 06:23:52PM -0500, Jerome Glisse wrote: > > > > I still don't understand what you driving at - you've said in both > > > cases a user VMA exists. > > > > In the former case no, there is no VMA directly but if you want one than > > a device can provide one. But such VMA is useless as CPU access is not > > expected. > > I disagree it is useless, the VMA is going to be necessary to support > upcoming things like CAPI, you need it to support O_DIRECT from the > filesystem, DPDK, etc. This is why I am opposed to any model that is > not VMA based for setting up RDMA - that is shorted sighted and does > not seem to reflect where the industry is going. > > So focus on having VMA backed by actual physical memory that covers > your GPU objects and ask how do we wire up the '__user *' to the DMA > API in the best way so the DMA API still has enough information to > setup IOMMUs and whatnot. I am talking about 2 different thing. Existing hardware and API where you _do not_ have a vma and you do not need one. This is just existing stuff. Some close driver provide a functionality on top of this design. Question is do we want to do the same ? If yes and you insist on having a vma we could provide one but this is does not apply and is useless for where we are going with new hardware. With new hardware you just use malloc or mmap to allocate memory and then you use it directly with the device. Device driver can migrate any part of the process address space to device memory. In this scheme you have your usual VMAs but there is nothing special about them. Now when you try to do get_user_page() on any page that is inside the device it will fails because we do not allow any device memory to be pin. There is various reasons for that and they are not going away in any hw in the planing (so for next few years). Still we do want to support peer to peer mapping. Plan is to only do so with ODP capable hardware. Still we need to solve the IOMMU issue and it needs special handling inside the RDMA device. The way it works is that RDMA ask for a GPU page, GPU check if it has place inside its PCI bar to map this page for the device, this can fail. If it succeed then you need the IOMMU to let the RDMA device access the GPU PCI bar. So here we have 2 orthogonal problem. First one is how to make 2 drivers talks to each other to setup mapping to allow peer to peer and second is about IOMMU. > > What i was trying to get accross is that no matter what level you > > consider in the end you still need something at the DMA API level. > > And that the 2 different use case (device vma or regular vma) means > > 2 differents API for the device driver. > > I agree we need new stuff at the DMA API level, but I am opposed to > the idea we need two API paths that the *driver* has to figure out. > That is fundamentally not what I want as a driver developer. > > Give me a common API to convert '__user *' to a scatter list and pin > the pages. This needs to figure out your two cases. And Huge > Pages. And ZONE_DIRECT.. (a better get_user_pages) Pining is not gonna happen like i said it would hinder the GPU to the point it would become useless. > Give me an API to take the scatter list and DMA map it, handling all > the stuff associated with peer-peer. (a better dma_map_sg) > > Give me a notifier scheme to rework my scatter list when physical > pages need to change (mmu notifiers) > > Use the scatter list memory to convey needed information from the > first step to the second. > > Do not bother the driver with distinctions on what kind of memory is > behind that VMA. Don't ask me to use get_user_pages or > gpu_get_user_pages, do not ask me to use dma_map_sg or > dma_map_sg_peer_direct. The Driver Doesn't Need To Know. I understand you want it easy but there must be part that must be aware, at very least the ODP logic. Creating a peer to peer mapping is a multi step process and some of those step can fails. Fallback is always to migrate back to system memory as a default path that can not fail, except if we are out of memory. > IMHO this is why GPU direct is not mergable - it creates a crazy > parallel mini-mm subsystem inside RDMA and uses that to connect to a > GPU driver, everything is expected to have parallel paths for GPU > direct and normal MM. No good at all. Existing hardware and new hardware works differently. I am trying to explain the two different design needed for each one. You understandtably dislike the existing hardware that has more stringent requirement and can not be supported transparently and need dedicated communication with the two driver. New hardware that have a completely different API in userspace. We can decide to only support the latter and forget about the former. > > > So, how do you identify these GPU objects? How do you expect RDMA > > > convert them to scatter lists? How will ODP work? > > > > No ODP on
Re: [PATCH v2 0/4] Write protect DAX PMDs in *sync path
On Tue, 3 Jan 2017 17:13:49 -0700 Ross Zwislerwrote: > On Thu, Dec 22, 2016 at 02:18:52PM -0700, Ross Zwisler wrote: > > Currently dax_mapping_entry_mkclean() fails to clean and write protect the > > pmd_t of a DAX PMD entry during an *sync operation. This can result in > > data loss, as detailed in patch 4. > > > > You can find a working tree here: > > > > https://git.kernel.org/cgit/linux/kernel/git/zwisler/linux.git/log/?h=dax_pmd_clean_v2 > > > > This series applies cleanly to mmotm-2016-12-19-16-31. > > > > Changes since v1: > > - Included Dan's patch to kill DAX support for UML. > > - Instead of wrapping the DAX PMD code in dax_mapping_entry_mkclean() in > >an #ifdef, we now create a stub for pmdp_huge_clear_flush() for the case > >when CONFIG_TRANSPARENT_HUGEPAGE isn't defined. (Dan & Jan) > > > > Dan Williams (1): > > dax: kill uml support > > > > Ross Zwisler (3): > > dax: add stub for pmdp_huge_clear_flush() > > mm: add follow_pte_pmd() > > dax: wrprotect pmd_t in dax_mapping_entry_mkclean > > > > fs/Kconfig| 2 +- > > fs/dax.c | 49 > > ++- > > include/asm-generic/pgtable.h | 10 + > > include/linux/mm.h| 4 ++-- > > mm/memory.c | 41 > > 5 files changed, 79 insertions(+), 27 deletions(-) > > Well, 0-day found another architecture that doesn't define pmd_pfn() et al., > so we'll need some more fixes. (Thank you, 0-day, for the coverage!) > > I have to apologize, I didn't understand that Dan intended his "dax: kill uml > support" patch to land in v4.11. I thought he intended it as a cleanup to my > series, which really needs to land in v4.10. That's why I folded them > together into this v2, along with the wrapper suggested by Jan. > > Andrew, does it work for you to just keep v1 of this series, and eventually > send that to Linus for v4.10? > > https://lkml.org/lkml/2016/12/20/649 > > You've already pulled that one into -mm, and it does correctly solve the data > loss issue. > > That would let us deal with getting rid of the #ifdef, blacklisting > architectures and introducing the pmdp_huge_clear_flush() strub in a follow-on > series for v4.11. I have mm-add-follow_pte_pmd.patch and dax-wrprotect-pmd_t-in-dax_mapping_entry_mkclean.patch queued for 4.10. Please (re)send any additional patches, indicating for each one whether you believe it should also go into 4.10? ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On 2017-01-05 07:30 PM, Jason Gunthorpe wrote: but I am opposed to the idea we need two API paths that the *driver* has to figure out. That is fundamentally not what I want as a driver developer. Give me a common API to convert '__user *' to a scatter list and pin the pages. Completely agreed. IMHO there is no sense to duplicate the same logic everywhere as well as trying to find places where it is missing. Sincerely yours, Serguei Sagalovitch ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 06:23:52PM -0500, Jerome Glisse wrote: > > I still don't understand what you driving at - you've said in both > > cases a user VMA exists. > > In the former case no, there is no VMA directly but if you want one than > a device can provide one. But such VMA is useless as CPU access is not > expected. I disagree it is useless, the VMA is going to be necessary to support upcoming things like CAPI, you need it to support O_DIRECT from the filesystem, DPDK, etc. This is why I am opposed to any model that is not VMA based for setting up RDMA - that is shorted sighted and does not seem to reflect where the industry is going. So focus on having VMA backed by actual physical memory that covers your GPU objects and ask how do we wire up the '__user *' to the DMA API in the best way so the DMA API still has enough information to setup IOMMUs and whatnot. > What i was trying to get accross is that no matter what level you > consider in the end you still need something at the DMA API level. > And that the 2 different use case (device vma or regular vma) means > 2 differents API for the device driver. I agree we need new stuff at the DMA API level, but I am opposed to the idea we need two API paths that the *driver* has to figure out. That is fundamentally not what I want as a driver developer. Give me a common API to convert '__user *' to a scatter list and pin the pages. This needs to figure out your two cases. And Huge Pages. And ZONE_DIRECT.. (a better get_user_pages) Give me an API to take the scatter list and DMA map it, handling all the stuff associated with peer-peer. (a better dma_map_sg) Give me a notifier scheme to rework my scatter list when physical pages need to change (mmu notifiers) Use the scatter list memory to convey needed information from the first step to the second. Do not bother the driver with distinctions on what kind of memory is behind that VMA. Don't ask me to use get_user_pages or gpu_get_user_pages, do not ask me to use dma_map_sg or dma_map_sg_peer_direct. The Driver Doesn't Need To Know. IMHO this is why GPU direct is not mergable - it creates a crazy parallel mini-mm subsystem inside RDMA and uses that to connect to a GPU driver, everything is expected to have parallel paths for GPU direct and normal MM. No good at all. > > So, how do you identify these GPU objects? How do you expect RDMA > > convert them to scatter lists? How will ODP work? > > No ODP on those. If you want vma, the GPU device driver can provide You said you needed invalidate, that has to be done via ODP. Jason ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 03:42:15PM -0700, Jason Gunthorpe wrote: > On Thu, Jan 05, 2017 at 03:19:36PM -0500, Jerome Glisse wrote: > > > > Always having a VMA changes the discussion - the question is how to > > > create a VMA that reprensents IO device memory, and how do DMA > > > consumers extract the correct information from that VMA to pass to the > > > kernel DMA API so it can setup peer-peer DMA. > > > > Well my point is that it can't be. In HMM case inside a single VMA > > you > [..] > > > In the GPUDirect case the idea is that you have a specific device vma > > that you map for peer to peer. > > [..] > > I still don't understand what you driving at - you've said in both > cases a user VMA exists. In the former case no, there is no VMA directly but if you want one than a device can provide one. But such VMA is useless as CPU access is not expected. > > From my perspective in RDMA, all I want is a core kernel flow to > convert a '__user *' into a scatter list of DMA addresses, that works no > matter what is backing that VMA, be it HMM, a 'hidden' GPU object, or > struct page memory. > > A '__user *' pointer is the only way to setup a RDMA MR, and I see no > reason to have another API at this time. > > The details of how to translate to a scatter list are a MM subject, > and the MM folks need to get > > I just don't care if that routine works at a page level, or a whole > VMA level, or some combination of both, that is up to the MM team to > figure out :) And that's what i am trying to get accross. There is 2 cases here. What exist on today hardware. Thing like GPU direct, that works on VMA level. Versus where some new hardware is going were want to do thing on page level. Both require different API at different level. What i was trying to get accross is that no matter what level you consider in the end you still need something at the DMA API level. And that the 2 different use case (device vma or regular vma) means 2 differents API for the device driver. > > > a page level. Expectation here is that the GPU userspace expose a special > > API to allow RDMA to directly happen on GPU object allocated through > > GPU specific API (ie it is not regular memory and it is not accessible > > by CPU). > > So, how do you identify these GPU objects? How do you expect RDMA > convert them to scatter lists? How will ODP work? No ODP on those. If you want vma, the GPU device driver can provide one. GPU object are disjoint from regular memory (coming from some form of mmap). They are created through ioctl and in many case are never expose to the CPU. They only exist inside the GPU driver realm. None the less there is usecase where exchanging those object accross computer over a network make sense. I am not an end user here :) > > > We have MMU notifiers to handle this today in RDMA. Async RDMA MR > > > Invalidate like you see in the above out of tree patches is totally > > > crazy and shouldn't be in mainline. Use ODP capable RDMA hardware. > > > > Well there is still a large base of hardware that do not have such > > feature and some people would like to be able to keep using those. > > Hopefully someone will figure out how to do that without the crazy > async MR invalidation. Personnaly i don't care too much about this old hardware and thus i am fine without supporting them. The open source userspace is playing catchup and doing feature for old hardware probably does not make sense. Cheers, Jérôme ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 03:19:36PM -0500, Jerome Glisse wrote: > > Always having a VMA changes the discussion - the question is how to > > create a VMA that reprensents IO device memory, and how do DMA > > consumers extract the correct information from that VMA to pass to the > > kernel DMA API so it can setup peer-peer DMA. > > Well my point is that it can't be. In HMM case inside a single VMA > you [..] > In the GPUDirect case the idea is that you have a specific device vma > that you map for peer to peer. [..] I still don't understand what you driving at - you've said in both cases a user VMA exists. >From my perspective in RDMA, all I want is a core kernel flow to convert a '__user *' into a scatter list of DMA addresses, that works no matter what is backing that VMA, be it HMM, a 'hidden' GPU object, or struct page memory. A '__user *' pointer is the only way to setup a RDMA MR, and I see no reason to have another API at this time. The details of how to translate to a scatter list are a MM subject, and the MM folks need to get I just don't care if that routine works at a page level, or a whole VMA level, or some combination of both, that is up to the MM team to figure out :) > a page level. Expectation here is that the GPU userspace expose a special > API to allow RDMA to directly happen on GPU object allocated through > GPU specific API (ie it is not regular memory and it is not accessible > by CPU). So, how do you identify these GPU objects? How do you expect RDMA convert them to scatter lists? How will ODP work? > > We have MMU notifiers to handle this today in RDMA. Async RDMA MR > > Invalidate like you see in the above out of tree patches is totally > > crazy and shouldn't be in mainline. Use ODP capable RDMA hardware. > > Well there is still a large base of hardware that do not have such > feature and some people would like to be able to keep using those. Hopefully someone will figure out how to do that without the crazy async MR invalidation. Jason ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 01:07:19PM -0700, Jason Gunthorpe wrote: > On Thu, Jan 05, 2017 at 02:54:24PM -0500, Jerome Glisse wrote: > > > Mellanox and NVidia support peer to peer with what they market a > > GPUDirect. It only works without IOMMU. It is probably not upstream : > > > > https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg21402.html > > > > I thought it was but it seems it require an out of tree driver to work. > > Right, it is out of tree and not under consideration for mainline. > > > Wether there is a vma or not isn't important to the issue anyway. If > > you want to enforce VMA rule for RDMA it is an RDMA specific discussion > > in which i don't want to be involve, it is not my turf :) > > Always having a VMA changes the discussion - the question is how to > create a VMA that reprensents IO device memory, and how do DMA > consumers extract the correct information from that VMA to pass to the > kernel DMA API so it can setup peer-peer DMA. Well my point is that it can't be. In HMM case inside a single VMA you can have one page inside GPU memory at address A but next page inside regular memory at A+4k. So handling this at the VMA level does not make sense. So in this case you would get the device from the struct page and you would query through common API to determine if you can do peer to peer. If not it would trigger migration back to regular memory. If yes then you still have to solve the IOMMU issue and hence the DMA API changes that were propose. In the GPUDirect case the idea is that you have a specific device vma that you map for peer to peer. Here thing can be at vma level and not at a page level. Expectation here is that the GPU userspace expose a special API to allow RDMA to directly happen on GPU object allocated through GPU specific API (ie it is not regular memory and it is not accessible by CPU). Both case are disjoint. Both case need to solve the IOMMU issue which seems to be best solve at the DMA API level. > > What matter is the back channel API between peer-to-peer device. Like > > the above patchset points out for GPU we need to be able to invalidate > > a mapping at any point in time. Pining is not something we want to > > live with. > > We have MMU notifiers to handle this today in RDMA. Async RDMA MR > Invalidate like you see in the above out of tree patches is totally > crazy and shouldn't be in mainline. Use ODP capable RDMA hardware. Well there is still a large base of hardware that do not have such feature and some people would like to be able to keep using those. I believe allowing direct access to GPU object that are otherwise hidden from regular kernel memory management is still meaningfull. Cheers, Jérôme ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 02:54:24PM -0500, Jerome Glisse wrote: > Mellanox and NVidia support peer to peer with what they market a > GPUDirect. It only works without IOMMU. It is probably not upstream : > > https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg21402.html > > I thought it was but it seems it require an out of tree driver to work. Right, it is out of tree and not under consideration for mainline. > Wether there is a vma or not isn't important to the issue anyway. If > you want to enforce VMA rule for RDMA it is an RDMA specific discussion > in which i don't want to be involve, it is not my turf :) Always having a VMA changes the discussion - the question is how to create a VMA that reprensents IO device memory, and how do DMA consumers extract the correct information from that VMA to pass to the kernel DMA API so it can setup peer-peer DMA. > What matter is the back channel API between peer-to-peer device. Like > the above patchset points out for GPU we need to be able to invalidate > a mapping at any point in time. Pining is not something we want to > live with. We have MMU notifiers to handle this today in RDMA. Async RDMA MR Invalidate like you see in the above out of tree patches is totally crazy and shouldn't be in mainline. Use ODP capable RDMA hardware. Jason ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
[ndctl PATCH] test: document workaround for unit-test-module load priority
On some distributions it appears the recommended kernel build procedure can sometimes prefer the in-tree module over an out-of-tree module of the same name in the /lib/modules//extra directory. This can be worked around with an explicit depmod.d policy. Document how to setup this workaround. Reported-by: Dave JiangSigned-off-by: Dan Williams --- README.md | 26 ++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index e9dec67a0ea2..38fc050210c3 100644 --- a/README.md +++ b/README.md @@ -56,3 +56,29 @@ git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git` 5. Now run `make check` in the ndctl source directory, or `ndctl test`, if ndctl was built with `--enable-test`. +Troubleshooting +=== + +The unit tests will validate that the environment is set up correctly +before they try to run. If the platform is misconfigured, i.e. the unit +test modules are not available, or the test versions of the modules are +superseded by the "in-tree/production" version of the modules `make +check` will skip tests and report a message like the following in +test/test-suite.log: +`SKIP: libndctl` +`==` +`test/init: nfit_test_init: nfit.ko: appears to be production version: /lib/modules/4.8.8-200.fc24.x86_64/kernel/drivers/acpi/nfit/nfit.ko.xz` +`__ndctl_test_skip: explicit skip test_libndctl:2684` +`nfit_test unavailable skipping tests` + +If the unit test modules are indeed available in the modules 'extra' +directory the default depmod policy can be overridden by adding a file +to /etc/depmod.d with the following contents: +`override nfit * extra` +`override dax * extra` +`override dax_pmem * extra` +`override libnvdimm * extra` +`override nd_blk * extra` +`override nd_btt * extra` +`override nd_e820 * extra` +`override nd_pmem * extra` ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: Enabling peer to peer device transactions for PCIe devices
On Thu, Jan 05, 2017 at 01:39:29PM -0500, Jerome Glisse wrote: > 1) peer-to-peer because of userspace specific API like NVidia GPU > direct (AMD is pushing its own similar API i just can't remember > marketing name). This does not happen through a vma, this happens > through specific device driver call going through device specific > ioctl on both side (GPU and RDMA). So both kernel driver are aware > of each others. Today you can only do user-initiated RDMA operations in conjection with a VMA. We'd need a really big and strong reason to create an entirely new non-VMA based memory handle scheme for RDMA. So my inclination is to just completely push back on this idea. You need a VMA to do RMA. GPUs need to create VMAs for the memory they want to RDMA from, even if the VMA handle just causes SIGBUS for any CPU access. Jason ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: [PATCH v5 1/2] mm, dax: make pmd_fault() and friends to be the same as fault()
On 12/19/2016 11:27 AM, Dave Jiang wrote: > Instead of passing in multiple parameters in the pmd_fault() handler, > a vmf can be passed in just like a fault() handler. This will simplify > code and remove the need for the actual pmd fault handlers to allocate a > vmf. Related functions are also modified to do the same. > > Signed-off-by: Dave Jiang> Reviewed-by: Ross Zwisler > Reviewed-by: Jan Kara Andrew, I haven't seen any more comments on these 2 patches. Will you pick them up for the mm tree? Thanks! > --- > drivers/dax/dax.c | 16 +++- > fs/dax.c | 42 > ++--- > fs/ext4/file.c|9 - > fs/xfs/xfs_file.c | 10 -- > include/linux/dax.h |7 +++ > include/linux/mm.h|3 +-- > include/trace/events/fs_dax.h | 15 +++ > mm/memory.c |6 ++ > 8 files changed, 46 insertions(+), 62 deletions(-) > > diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c > index c753a4c..947e49a 100644 > --- a/drivers/dax/dax.c > +++ b/drivers/dax/dax.c > @@ -379,10 +379,9 @@ static int dax_dev_fault(struct vm_area_struct *vma, > struct vm_fault *vmf) > } > > static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, > - struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, > - unsigned int flags) > + struct vm_area_struct *vma, struct vm_fault *vmf) > { > - unsigned long pmd_addr = addr & PMD_MASK; > + unsigned long pmd_addr = vmf->address & PMD_MASK; > struct device *dev = _dev->dev; > struct dax_region *dax_region; > phys_addr_t phys; > @@ -414,23 +413,22 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, > > pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); > > - return vmf_insert_pfn_pmd(vma, addr, pmd, pfn, > - flags & FAULT_FLAG_WRITE); > + return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, > + vmf->flags & FAULT_FLAG_WRITE); > } > > -static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, > - pmd_t *pmd, unsigned int flags) > +static int dax_dev_pmd_fault(struct vm_area_struct *vma, struct vm_fault > *vmf) > { > int rc; > struct file *filp = vma->vm_file; > struct dax_dev *dax_dev = filp->private_data; > > dev_dbg(_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, > - current->comm, (flags & FAULT_FLAG_WRITE) > + current->comm, (vmf->flags & FAULT_FLAG_WRITE) > ? "write" : "read", vma->vm_start, vma->vm_end); > > rcu_read_lock(); > - rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); > + rc = __dax_dev_pmd_fault(dax_dev, vma, vmf); > rcu_read_unlock(); > > return rc; > diff --git a/fs/dax.c b/fs/dax.c > index d3fe880..446e861 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -1310,18 +1310,17 @@ static int dax_pmd_load_hole(struct vm_area_struct > *vma, pmd_t *pmd, > return VM_FAULT_FALLBACK; > } > > -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, > - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) > +int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, > + struct iomap_ops *ops) > { > struct address_space *mapping = vma->vm_file->f_mapping; > - unsigned long pmd_addr = address & PMD_MASK; > - bool write = flags & FAULT_FLAG_WRITE; > + unsigned long pmd_addr = vmf->address & PMD_MASK; > + bool write = vmf->flags & FAULT_FLAG_WRITE; > unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; > struct inode *inode = mapping->host; > int result = VM_FAULT_FALLBACK; > struct iomap iomap = { 0 }; > - pgoff_t max_pgoff, pgoff; > - struct vm_fault vmf; > + pgoff_t max_pgoff; > void *entry; > loff_t pos; > int error; > @@ -1331,10 +1330,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, > unsigned long address, >* supposed to hold locks serializing us with truncate / punch hole so >* this is a reliable test. >*/ > - pgoff = linear_page_index(vma, pmd_addr); > + vmf->pgoff = linear_page_index(vma, pmd_addr); > max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; > > - trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0); > + trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0); > > /* Fall back to PTEs if we're going to COW */ > if (write && !(vma->vm_flags & VM_SHARED)) > @@ -1346,13 +1345,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, > unsigned long address, > if ((pmd_addr + PMD_SIZE) > vma->vm_end) > goto fallback; > > - if (pgoff > max_pgoff) { > + if
Re: Enabling peer to peer device transactions for PCIe devices
Sorry to revive this thread but it fells through my filters and i miss it. I have been going through it and i think the discussion has been hinder by the fact that distinct problems were merge while they should be address separately. First for peer-to-peer we need to be clear on how this happens. Two cases here : 1) peer-to-peer because of userspace specific API like NVidia GPU direct (AMD is pushing its own similar API i just can't remember marketing name). This does not happen through a vma, this happens through specific device driver call going through device specific ioctl on both side (GPU and RDMA). So both kernel driver are aware of each others. 2) peer-to-peer because RDMA/device is trying to access a regular vma (ie non special either private anonymous or share memory or mmap of a regular file not a device file). For 1) there is no need to over complicate thing. Device driver must have a back-channel between them and must be able to invalidate their respective mapping (ie GPU must be able to ask RDMA device to kill/ stop its MR). So remaining issue for 1) is how to enable effective peer-to-peer mapping given that it might not work reliably on all platform. Here Alex was listing existing proposal: A P2P DMA DMA-API/PCI map_peer_resource support for peer-to-peer http://www.spinics.net/lists/linux-pci/msg44560.html B ZONE_DEVICE IO irect I/O and DMA for persistent memory https://lwn.net/Articles/672457/ C DMA-BUF RDMA subsystem DMA-BUF support http://www.spinics.net/lists/linux-rdma/msg38748.html D iopmem iopmem : A block device for PCIe memory https://lwn.net/Articles/703895/ E HMM (not interesting for case 1) F Something new Of the above D is ill suited for for GPU as we do not want to pin GPU memory and D is design with long live object that do not move. Also i do not think that exposing device PCIe bar through a new /dev/somefilename is a good idea for GPU. So i think this should be discarded. HMM should be discard in respect of case 1 too. It is useful for case 2. I don't think dma-buf is the right path either. So we i think there is only A and B that make sense. Now for use case 1 i think A is the best solution. No need to have struct page and it require explicit knowlegde for device driver that it is mapping another device memory which is a given in usecase 1. If we look at case 2 the situation is bit more complex. Here RDMA is just trying to access a regular VMA but it might happens that some memory inside that VMA reside inside a device memory. When that happens we would like to avoid to move that memory back to system memory assuming that a peer mapping is doable. Usecase 2 assume that the GPU is either on platform with CAPI or CCTX (or something similar) in which case it is easy as device memory will have struct page and is always accessible by CPU and transparent from device to device access (AFAICT). So we left with platform that do not have proper support for device memory (ie CPU can not access it the same as DDR or as limited access). Which apply to x86 for the foreseeable future. This is the problem HMM address, allowing to transparently use device memory inside a process even if direct CPU access are not permited. I have plan to support peer-to-peer with HMM because it is an important usecase. The idea is to have the device driver fault against ZONE_DEVICE page and communicate through common API to establish mapping. HMM will only handle keeping track of device to device mapping and allowing to invalidate such mapping at any time to allow memory to be migrated. I do not intend to solve the IOMMU side of the problem or even the PCI hierarchy issue where you can't peer-to-peer between device accross some PCI bridge. I believe this is an orthogonal problem and that it is best solve inside the DMA API ie with solution A. I do not think we should try to solve all the problems with a common solutions. They are too disparate from capabilities (what the hardware can and can't do). >From my point of view there is few take aways: - device should only access regular vma - device should never try to access vma that point to another device (mmap of any file in /dev) - peer to peer access through dedicated userspace API must involve dedicated API between kernel driver taking part into the peer to peer access - peer to peer of regular vma must involve a common API for drivers to interact so no driver can block the other So i think the DMA-API proposal is the one to pursue and others problem relating to handling GPU memory and how to use it is a different kind of problem. One with either an hardware solution (CAPI, CCTX, ...) or a software solution (HMM so far). I don't think we should conflict the 2 problems into one. Anyway i think this should be something worth discussing face to face with interested party to flesh out a solution (can be at LSF/MM or in another forum). Cheers, Jérôme
[ndctl PATCH] test: validate nfit_test modules before running tests
System crashes and other failures can occur if an environment attempts to run the unit tests with the "production" / in-tree versions of the nfit and libnvdimm modules. Ensure that all required test modules are supplied by out-of-tree versions. Granted that this detection mechanism will not differentiate the proper tools/testing/nvdimm/ out-of-tree modules with something like a "driver update" version of the modules, but it should catch the common case of a platform being misconfigured for unit-test execution. Reported-by: Dave JiangSigned-off-by: Dan Williams --- test/Makefile.am | 25 ++ test/core.c | 76 ++ 2 files changed, 90 insertions(+), 11 deletions(-) diff --git a/test/Makefile.am b/test/Makefile.am index 592308ee5f07..524fafa4a28a 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -40,30 +40,33 @@ LIBNDCTL_LIB =\ ../ndctl/lib/libndctl.la \ ../daxctl/lib/libdaxctl.la -libndctl_SOURCES = libndctl.c core.c +testcore =\ + core.c \ + ../util/log.c \ + ../util/sysfs.c + +libndctl_SOURCES = libndctl.c $(testcore) libndctl_LDADD = $(LIBNDCTL_LIB) $(UUID_LIBS) $(KMOD_LIBS) dsm_fail_SOURCES =\ dsm-fail.c \ - core.c \ - ../util/log.c \ - ../util/sysfs.c + $(testcore) dsm_fail_LDADD = $(LIBNDCTL_LIB) $(KMOD_LIBS) -blk_ns_SOURCES = blk_namespaces.c core.c +blk_ns_SOURCES = blk_namespaces.c $(testcore) blk_ns_LDADD = $(LIBNDCTL_LIB) $(KMOD_LIBS) -pmem_ns_SOURCES = pmem_namespaces.c core.c +pmem_ns_SOURCES = pmem_namespaces.c $(testcore) pmem_ns_LDADD = $(LIBNDCTL_LIB) $(KMOD_LIBS) -dpa_alloc_SOURCES = dpa-alloc.c core.c +dpa_alloc_SOURCES = dpa-alloc.c $(testcore) dpa_alloc_LDADD = $(LIBNDCTL_LIB) $(UUID_LIBS) $(KMOD_LIBS) -parent_uuid_SOURCES = parent-uuid.c core.c +parent_uuid_SOURCES = parent-uuid.c $(testcore) parent_uuid_LDADD = $(LIBNDCTL_LIB) $(UUID_LIBS) $(KMOD_LIBS) -dax_dev_SOURCES = dax-dev.c core.c +dax_dev_SOURCES = dax-dev.c $(testcore) dax_dev_LDADD = $(LIBNDCTL_LIB) $(KMOD_LIBS) dax_pmd_SOURCES = dax-pmd.c @@ -73,7 +76,7 @@ device_dax_SOURCES = \ device-dax.c \ dax-dev.c \ dax-pmd.c \ - core.c \ + $(testcore) \ ../ndctl/builtin-xaction-namespace.c \ ../util/json.c device_dax_LDADD = \ @@ -84,7 +87,7 @@ device_dax_LDADD = \ multi_pmem_SOURCES = \ multi-pmem.c \ - core.c \ + $(testcore) \ ../ndctl/builtin-xaction-namespace.c \ ../util/json.c multi_pmem_LDADD = \ diff --git a/test/core.c b/test/core.c index 3119fd826a94..d1c38902feb4 100644 --- a/test/core.c +++ b/test/core.c @@ -6,6 +6,10 @@ #include #include +#include +#include +#include + #define KVER_STRLEN 20 struct ndctl_test { @@ -103,12 +107,84 @@ int nfit_test_init(struct kmod_ctx **ctx, struct kmod_module **mod, int log_level) { int rc; + unsigned int i; + struct log_ctx log_ctx; + const char *list[] = { + "nfit", + "dax", + "dax_pmem", + "libnvdimm", + "nd_blk", + "nd_btt", + "nd_e820", + "nd_pmem", + }; + + log_init(_ctx, "test/init", "NDCTL_TEST"); + log_ctx.log_priority = log_level; *ctx = kmod_new(NULL, NULL); if (!*ctx) return -ENXIO; kmod_set_log_priority(*ctx, log_level); + /* +* Check that all nfit, libnvdimm, and device-dax modules are +* the mocked versions. If they are loaded, check that they have +* the "out-of-tree" kernel taint, otherwise check that they +* come from the "/lib/modules//extra" directory. +*/ + for (i = 0; i < ARRAY_SIZE(list); i++) { + char attr[SYSFS_ATTR_SIZE]; + const char *name = list[i]; + const char *path; + char buf[100]; + int state; + + rc = kmod_module_new_from_name(*ctx, name, mod); + if (rc) { + log_err(_ctx, "%s.ko: missing\n", name); + break; + } + + path = kmod_module_get_path(*mod); + if (!path) { + log_err(_ctx, "%s.ko: failed to get path\n", name); + break; + } + + if (!strstr(path, "/extra/")) { + log_err(_ctx, "%s.ko: appears to be production version: %s\n", + name, path); + break; + } + + state = kmod_module_get_initstate(*mod); + if (state == KMOD_MODULE_LIVE) { + sprintf(buf, "/sys/module/%s/taint",
Re: [PATCH] pmem: return EIO on read_pmem() failure
Stefan Hajnocziwrites: > The read_pmem() function uses memcpy_mcsafe() on x86 where an EFAULT > error code indicates a failed read. Block I/O should use EIO to > indicate failure. Other pmem code paths (like bad blocks) already use > EIO so let's be consistent. I agree that it's the right thing to return, though right now it probably doesn't matter. As Dan said, all paths leading back to userspace interpret any non-zero value as EIO. Reviewed-by: Jeff Moyer > Cc: Dan Williams > Signed-off-by: Stefan Hajnoczi > --- > drivers/nvdimm/pmem.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > index 7282d74..5b536be 100644 > --- a/drivers/nvdimm/pmem.c > +++ b/drivers/nvdimm/pmem.c > @@ -90,7 +90,9 @@ static int read_pmem(struct page *page, unsigned int off, > > rc = memcpy_from_pmem(mem + off, pmem_addr, len); > kunmap_atomic(mem); > - return rc; > + if (rc) > + return -EIO; > + return 0; > } > > static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: [ndctl PATCH] ndctl: support machines without numa
Dan Williamswrites: > From: Diego Dompe > > Currently when using ndctl in machines that do not have numa, the ndctl > command fails because it fails to find the numa_node attribute in > systems. s/machines that do not have numa/kernels compiled with CONFIG_NUMA=n/ I don't know that this was the right decision for the kernel, but that ship's sailed. > Just assume the default, zero, in these cases rather than fail. Looks fine to me. Reviewed-by: Jeff Moyer > > Link: https://github.com/pmem/ndctl/pull/9 > [djbw: minor style fixups] > Signed-off-by: Dan Williams > --- > > Diego, if this reworked version looks good to you I'll add your > "Signed-off-by: Diego Dompe " to the tags. Let me know. > Thanks for the fix! > > ndctl/lib/libndctl.c |5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) > > diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c > index ea3111eb6c93..8240235dff5c 100644 > --- a/ndctl/lib/libndctl.c > +++ b/ndctl/lib/libndctl.c > @@ -2780,9 +2780,8 @@ static void *add_namespace(void *parent, int id, const > char *ndns_base) > ndns->raw_mode = strtoul(buf, NULL, 0); > > sprintf(path, "%s/numa_node", ndns_base); > - if (sysfs_read_attr(ctx, path, buf) < 0) > - goto err_read; > - ndns->numa_node = strtol(buf, NULL, 0); > + if (sysfs_read_attr(ctx, path, buf) == 0) > + ndns->numa_node = strtol(buf, NULL, 0); > > switch (ndns->type) { > case ND_DEVICE_NAMESPACE_BLK: > > ___ > Linux-nvdimm mailing list > Linux-nvdimm@lists.01.org > https://lists.01.org/mailman/listinfo/linux-nvdimm ___ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
Re: [PATCH v5] x86: fix kaslr and memmap collision
On 01/04/17 at 11:29am, Dave Jiang wrote: > CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address. > However it does not take into account the memmap= parameter passed in from > the kernel cmdline. This results in the kernel sometimes being put in > the middle of memmap. Teaching kaslr to not insert the kernel in > memmap defined regions. We will support up to 4 memmap regions. Any > additional regions will cause kaslr to disable. The mem_avoid set has > been augmented to add up to 4 unusable regions of memmaps provided by the > user to exclude those regions from the set of valid address range to insert > the uncompressed kernel image. The nn@ss ranges will be skipped by the > mem_avoid set since it indicates memory useable. > > Signed-off-by: Dave Jiang> --- > > v2: > Addressing comments from Ingo. > - Handle entire list of memmaps > v3: > Fix 32bit build issue > v4: > Addressing comments from Baoquan > - Not exclude nn@ss ranges > v5: > Addressing additional comments from Baoquan > - Update commit header and various coding style changes > > diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h > index e5612f3..59c2075 100644 > --- a/arch/x86/boot/boot.h > +++ b/arch/x86/boot/boot.h > @@ -332,7 +332,10 @@ int strncmp(const char *cs, const char *ct, size_t > count); > size_t strnlen(const char *s, size_t maxlen); > unsigned int atou(const char *s); > unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int > base); > +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base); > +long simple_strtol(const char *cp, char **endp, unsigned int base); > size_t strlen(const char *s); > +char *strchr(const char *s, int c); > > /* tty.c */ > void puts(const char *); > diff --git a/arch/x86/boot/compressed/kaslr.c > b/arch/x86/boot/compressed/kaslr.c > index a66854d..036b514 100644 > --- a/arch/x86/boot/compressed/kaslr.c > +++ b/arch/x86/boot/compressed/kaslr.c > @@ -11,6 +11,7 @@ > */ > #include "misc.h" > #include "error.h" > +#include "../boot.h" > > #include > #include > @@ -56,11 +57,16 @@ struct mem_vector { > unsigned long size; > }; > > +/* only supporting at most 4 unusable memmap regions with kaslr */ > +#define MAX_MEMMAP_REGIONS 4 > + > enum mem_avoid_index { > MEM_AVOID_ZO_RANGE = 0, > MEM_AVOID_INITRD, > MEM_AVOID_CMDLINE, > MEM_AVOID_BOOTPARAMS, > + MEM_AVOID_MEMMAP_BEGIN, > + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, > MEM_AVOID_MAX, > }; > > @@ -77,6 +83,121 @@ static bool mem_overlaps(struct mem_vector *one, struct > mem_vector *two) > return true; > } > > +/** > + * _memparse - parse a string with mem suffixes into a number > + * @ptr: Where parse begins > + * @retptr: (output) Optional pointer to next char after parse completes > + * > + * Parses a string into a number. The number stored at @ptr is > + * potentially suffixed with K, M, G, T, P, E. > + */ > +static unsigned long long _memparse(const char *ptr, char **retptr) > +{ > + char *endptr; /* local pointer to end of parsed string */ > + > + unsigned long long ret = simple_strtoull(ptr, , 0); > + > + switch (*endptr) { > + case 'E': > + case 'e': > + ret <<= 10; > + case 'P': > + case 'p': > + ret <<= 10; > + case 'T': > + case 't': > + ret <<= 10; > + case 'G': > + case 'g': > + ret <<= 10; > + case 'M': > + case 'm': > + ret <<= 10; > + case 'K': > + case 'k': > + ret <<= 10; > + endptr++; > + default: > + break; > + } > + > + if (retptr) > + *retptr = endptr; > + > + return ret; > +} > + > +static int > +parse_memmap(char *p, unsigned long long *start, unsigned long long *size) > +{ > + char *oldp; > + > + if (!p) > + return -EINVAL; > + > + /* we don't care about this option here */ > + if (!strncmp(p, "exactmap", 8)) > + return -EINVAL; > + > + oldp = p; > + *size = _memparse(p, ); > + if (p == oldp) > + return -EINVAL; > + > + switch (*p) { > + case '@': > + /* skip this region, usable */ > + *start = 0; > + *size = 0; > + return 0; > + case '#': > + case '$': > + case '!': > + *start = _memparse(p + 1, ); > + return 0; > + } > + > + return -EINVAL; > +} > + > +static int mem_avoid_memmap(void) > +{ > + char arg[128]; > + int rc = 0; > + > + /* see if we have any memmap areas */ > + if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) { > + int i = 0; > + char *str = arg; > + > + while (str && (i < MAX_MEMMAP_REGIONS)) { > + unsigned long long start, size; > + char *k = strchr(str, ','); > +