On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote:
> Multi-process DPDK application must mmap hugepages and PCI resources
> into the same virtual address space. By default the virtual addresses
> are chosen by the primary process automatically when calling the mmap.
> But sometimes the chosen virtual addresses aren't usable in secondary
> process - for example, secondary process is linked with more libraries
> than primary process, and the library occupies the same address space
> that the primary process has requested for PCI mappings.
> 
> This patch makes EAL try and map PCI BARs right after the hugepages
> (instead of location chosen by mmap) in virtual memory, so that PCI BARs
> have less chance of ending up in random places in virtual memory.
> 
> Signed-off-by: Liang Xu <liang.xu at cinfotech.cn>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>

Acked-by: Bruce Richardson <bruce.richardson at intel.com>

> ---
>  lib/librte_eal/linuxapp/eal/eal_pci.c              | 30 
> ++++++++++++++++------
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 13 ++++++++--
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 19 +++++++++++---
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  6 +++++
>  4 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index 5fe3961..79fbbb8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -97,6 +97,25 @@ error:
>       return -1;
>  }
>  
> +void *
> +pci_find_max_end_va(void)
> +{
> +     const struct rte_memseg *seg = rte_eal_get_physmem_layout();
> +     const struct rte_memseg *last = seg;
> +     unsigned i = 0;
> +
> +     for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
> +             if (seg->addr == NULL)
> +                     break;
> +
> +             if (seg->addr > last->addr)
> +                     last = seg;
> +
> +     }
> +     return RTE_PTR_ADD(last->addr, last->len);
> +}
> +
> +
>  /* map a particular resource from a file */
>  void *
>  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
> @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t 
> offset, size_t size)
>       /* Map the PCI memory resource of device */
>       mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>                       MAP_SHARED, fd, offset);
> -     if (mapaddr == MAP_FAILED ||
> -                     (requested_addr != NULL && mapaddr != requested_addr)) {
> +     if (mapaddr == MAP_FAILED) {
>               RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s 
> (%p)\n",
>                       __func__, fd, requested_addr,
>                       (unsigned long)size, (unsigned long)offset,
>                       strerror(errno), mapaddr);
> -             goto fail;
> +     } else {
> +             RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
>       }
>  
> -     RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
> -
>       return mapaddr;
> -
> -fail:
> -     return NULL;
>  }
>  
>  /* parse the "resource" sysfs file */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 7e62266..e53f06b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -35,6 +35,7 @@
>  #include <fcntl.h>
>  #include <dirent.h>
>  #include <sys/stat.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -48,6 +49,8 @@
>  
>  static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
>  
> +void *pci_map_addr = NULL;
> +
>  
>  #define OFF_MAX              ((uint64_t)(off_t)-1)
>  static int
> @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>                       if (maps[j].addr != NULL)
>                               fail = 1;
>                       else {
> -                             mapaddr = pci_map_resource(NULL, fd, 
> (off_t)offset,
> +                             /* try mapping somewhere close to the end of 
> hugepages */
> +                             if (pci_map_addr == NULL)
> +                                     pci_map_addr = pci_find_max_end_va();
> +
> +                             mapaddr = pci_map_resource(pci_map_addr, fd, 
> (off_t)offset,
>                                               (size_t)maps[j].size);
> -                             if (mapaddr == NULL)
> +                             if (mapaddr == MAP_FAILED)
>                                       fail = 1;
> +
> +                             pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) 
> maps[j].size);
>                       }
>  
>                       if (fail) {
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..c1246e8 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -37,6 +37,7 @@
>  #include <sys/eventfd.h>
>  #include <sys/socket.h>
>  #include <sys/ioctl.h>
> +#include <sys/mman.h>
>  
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>               if (i == msix_bar)
>                       continue;
>  
> -             bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, 
> reg.offset,
> -                             reg.size);
> +             if (internal_config.process_type == RTE_PROC_PRIMARY) {
> +                     /* try mapping somewhere close to the end of hugepages 
> */
> +                     if (pci_map_addr == NULL)
> +                             pci_map_addr = pci_find_max_end_va();
> +
> +                     bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, 
> reg.offset,
> +                                     reg.size);
> +                     pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
> +             } else {
> +                     bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, 
> reg.offset,
> +                                     reg.size);
> +             }
>  
> -             if (bar_addr == NULL) {
> +             if (bar_addr == MAP_FAILED ||
> +                             (internal_config.process_type == 
> RTE_PROC_SECONDARY &&
> +                                             bar_addr != maps[i].addr)) {
>                       RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", 
> pci_addr, i,
>                                       strerror(errno));
>                       close(vfio_dev_fd);
> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h 
> b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> index d758bee..1070eb8 100644
> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> @@ -59,6 +59,12 @@ struct mapped_pci_resource {
>  TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
>  extern struct mapped_pci_res_list *pci_res_list;
>  
> +/*
> + * Helper function to map PCI resources right after hugepages in virtual 
> memory
> + */
> +extern void *pci_map_addr;
> +void *pci_find_max_end_va(void);
> +
>  void *pci_map_resource(void *requested_addr, int fd, off_t offset,
>               size_t size);
>  
> -- 
> 1.8.1.4
> 

Reply via email to