On Wed, Nov 17, 2021 at 09:49:23PM +0000, [email protected] wrote:
> From: Elena Agostini <[email protected]>
> 
> This patch introduces GPU memory in testpmd through the gpudev library.
> Testpmd can be used for network benchmarks when using GPU memory
> instead of regular CPU memory to send and receive packets.
> This option is currently limited to iofwd engine to ensure
> no workload is applied on packets not accessible from the CPU.
> 
> The options chose is --mbuf-size so buffer split feature across
> different mempools can be enabled.
> 
> Signed-off-by: Elena Agostini <[email protected]>

Ignoring for now the discussion about whether this patch should be accepted
or not (and I would share many of the concerns about it going into testpmd) -
a few comments on the code changes themselves below.

/Bruce

> ---
>  app/test-pmd/cmdline.c    |  36 +++++++-
>  app/test-pmd/config.c     |   4 +-
>  app/test-pmd/icmpecho.c   |   2 +-
>  app/test-pmd/meson.build  |   3 +
>  app/test-pmd/parameters.c |  15 +++-
>  app/test-pmd/testpmd.c    | 182 +++++++++++++++++++++++++++++++++++---
>  app/test-pmd/testpmd.h    |  18 +++-
>  7 files changed, 240 insertions(+), 20 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 3faa37db6d..400aa07da8 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -3619,6 +3619,7 @@ parse_item_list(const char *str, const char *item_name, 
> unsigned int max_items,
>       unsigned int j;
>       int value_ok;
>       char c;
> +     int gpu_mbuf = 0;
>  
>       /*
>        * First parse all items in the list and store their value.
> @@ -3628,11 +3629,29 @@ parse_item_list(const char *str, const char 
> *item_name, unsigned int max_items,
>       value_ok = 0;
>       for (i = 0; i < strnlen(str, STR_TOKEN_SIZE); i++) {
>               c = str[i];
> +
Whitespace change unneeded for this patch, drop from diff.

>               if ((c >= '0') && (c <= '9')) {
>                       value = (unsigned int) (value * 10 + (c - '0'));
>                       value_ok = 1;
>                       continue;
>               }
> +             if (c == 'g') {
> +                     /*
> +                      * When this flag is set, mbufs for this segment
> +                      * will be created on GPU memory.
> +                      */
> +                     if (i < strnlen(str, STR_TOKEN_SIZE) - 1 && str[i+1] != 
> ',') {
> +                             fprintf(stderr, "input param list is not well 
> formatted\n");
> +                             return 0;
> +                     }
> +                     #ifdef RTE_LIB_GPUDEV
> +                             gpu_mbuf = 1;
> +                     #else
> +                             fprintf(stderr, "gpudev library not built. 
> Can't create mempools in GPU memory.\n");
> +                             return 0;
> +                     #endif

While I suppose it makes things more readable, the alignment here with the
ifdefs is unusual. The standard in DPDK is that #ifdefs always start at the
margin. If you want things inline like this, I suggest you convert the
macro to a global variable at the top of the file and use that in regular C
if conditions.

> +                     continue;
> +             }
>               if (c != ',') {
>                       fprintf(stderr, "character %c is not a decimal 
> digit\n", c);
>                       return 0;
> @@ -3645,6 +3664,8 @@ parse_item_list(const char *str, const char *item_name, 
> unsigned int max_items,
>                       parsed_items[nb_item] = value;
>                       value_ok = 0;
>                       value = 0;
> +                     mbuf_mem_types[nb_item] = gpu_mbuf ? MBUF_MEM_GPU : 
> MBUF_MEM_CPU;
> +                     gpu_mbuf = 0;
>               }
>               nb_item++;
>       }
> @@ -3653,12 +3674,15 @@ parse_item_list(const char *str, const char 
> *item_name, unsigned int max_items,
>                       item_name, nb_item + 1, max_items);
>               return 0;
>       }
> +
> +     mbuf_mem_types[nb_item] = gpu_mbuf ? MBUF_MEM_GPU : MBUF_MEM_CPU;
> +
>       parsed_items[nb_item++] = value;
>       if (! check_unique_values)
>               return nb_item;
>  
>       /*
> -      * Then, check that all values in the list are different.
> +      * Then, check that all values in the list are differents.

Original comment is correct. Remove this change from diff.

>        * No optimization here...
>        */
>       for (i = 0; i < nb_item; i++) {
> @@ -6874,6 +6898,16 @@ static void cmd_set_fwd_mode_parsed(void 
> *parsed_result,
>                                   __rte_unused void *data)
>  {
>       struct cmd_set_fwd_mode_result *res = parsed_result;
> +     int idx;
> +
> +     for (idx = 0; idx < MAX_SEGS_BUFFER_SPLIT; idx++) {
> +             if (mbuf_mem_types[idx] == MBUF_MEM_GPU &&
> +                             strcmp(res->mode, "io") != 0) {
> +                     TESTPMD_LOG(ERR,
> +                                     "GPU memory mbufs can be used with 
> iofwd engine only\n");

No point in wrapping like this, as you end up at the same column anyway.

> +                     return;
> +             }
> +     }
>  
>       retry_enabled = 0;
>       set_pkt_forwarding_mode(res->mode);
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 6fca09527b..9e8f8f1462 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -2966,7 +2966,7 @@ port_rss_reta_info(portid_t port_id,
>  }
>  
>  /*
> - * Displays the RSS hash functions of a port, and, optionally, the RSS hash
> + * Displays the RSS hash functions of a port, and, optionaly, the RSS hash

Original comment spelling is correct. Drop from diff.

>   * key of the port.
>   */
>  void
> @@ -5255,7 +5255,7 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t 
> addr_idx)
>  {
>       port->mc_addr_nb--;
>       if (addr_idx == port->mc_addr_nb) {
> -             /* No need to recompact the set of multicast addresses. */
> +             /* No need to recompact the set of multicast addressses. */
>               if (port->mc_addr_nb == 0) {
>                       /* free the pool of multicast addresses. */
>                       free(port->mc_addr_pool);
> diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
> index 99c94cb282..3a85ec3dd1 100644
> --- a/app/test-pmd/icmpecho.c
> +++ b/app/test-pmd/icmpecho.c
> @@ -53,7 +53,7 @@ arp_op_name(uint16_t arp_op)
>       default:
>               break;
>       }
> -     return "Unknown ARP op";
> +     return "Unkwown ARP op";

Introducting typo here too. Drop from diff.

>  }
>  
>  static const char *
> diff --git a/app/test-pmd/meson.build b/app/test-pmd/meson.build
> index 43130c8856..568660e18f 100644
> --- a/app/test-pmd/meson.build
> +++ b/app/test-pmd/meson.build
> @@ -43,6 +43,9 @@ if dpdk_conf.has('RTE_LIB_BPF')
>      sources += files('bpf_cmd.c')
>      deps += 'bpf'
>  endif
> +if dpdk_conf.has('RTE_LIB_GPUDEV')
> +    deps += 'gpudev'
> +endif
>  if dpdk_conf.has('RTE_LIB_GRO')
>      deps += 'gro'
>  endif
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> index f9185065af..dde5ef6590 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -86,7 +86,10 @@ usage(char* progname)
>              "in NUMA mode.\n");
>       printf("  --mbuf-size=N,[N1[,..Nn]: set the data size of mbuf to "
>              "N bytes. If multiple numbers are specified the extra pools "
> -            "will be created to receive with packet split features\n");
> +            "will be created to receive with packet split features\n"
> +                "Use 'g' suffix for GPU memory.\n"
> +                "If no or an unrecognized suffix is provided, CPU is 
> assumed\n");

Indentation looks wrong. Also, "g" is a bad suffix to append to a size,
since it would indicate "gigabyte" in most size-based values.

> +
>       printf("  --total-num-mbufs=N: set the number of mbufs to be allocated "
>              "in mbuf pools.\n");
>       printf("  --max-pkt-len=N: set the maximum size of packet to N 
> bytes.\n");
> @@ -594,6 +597,7 @@ launch_args_parse(int argc, char** argv)
>       struct rte_eth_dev_info dev_info;
>       uint16_t rec_nb_pkts;
>       int ret;
> +     uint32_t idx = 0;
>  
>       static struct option lgopts[] = {
>               { "help",                       0, 0, 0 },
> @@ -1543,4 +1547,13 @@ launch_args_parse(int argc, char** argv)
>                                 "ignored\n");
>               mempool_flags = 0;
>       }
> +
> +     for (idx = 0; idx < mbuf_data_size_n; idx++) {
> +             if (mbuf_mem_types[idx] == MBUF_MEM_GPU &&
> +                             strcmp(cur_fwd_eng->fwd_mode_name, "io") != 0) {
> +                     TESTPMD_LOG(ERR, 
> +                                     "GPU memory mbufs can be used with 
> iofwd engine only\n");

Again, no need to wrap here.

> +                     rte_exit(EXIT_FAILURE, "Command line is incorrect\n");
> +             }
> +     }
>  }
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index ba65342b6d..b6c55e61b1 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -206,6 +206,13 @@ uint32_t mbuf_data_size_n = 1; /* Number of specified 
> mbuf sizes. */
>  uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
>       DEFAULT_MBUF_DATA_SIZE
>  }; /**< Mbuf data space size. */
> +
> +/* Mbuf memory types. */
> +enum mbuf_mem_type mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT];
> +/* Pointers to external memory allocated for mempools. */
> +uintptr_t mempools_ext_ptr[MAX_SEGS_BUFFER_SPLIT];
> +size_t mempools_ext_size[MAX_SEGS_BUFFER_SPLIT];
> +
>  uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
>                                        * specified on command-line. */
>  uint16_t stats_period; /**< Period to show statistics (disabled by default) 
> */
> @@ -546,6 +553,12 @@ int proc_id;
>   */
>  unsigned int num_procs = 1;
>  
> +/*
> + * In case of GPU memory external mbufs use, for simplicity,
> + * the first GPU device in the list.
> + */
> +int gpu_id = 0;
> +
>  static void
>  eth_rx_metadata_negotiate_mp(uint16_t port_id)
>  {
> @@ -1108,6 +1121,94 @@ setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, 
> unsigned int socket_id,
>       return ext_num;
>  }
>  
> +static struct rte_mempool *
> +gpu_mbuf_pool_create(__rte_unused uint16_t mbuf_seg_size,
> +             __rte_unused unsigned int nb_mbuf,
> +             __rte_unused unsigned int socket_id,
> +             __rte_unused uint16_t port_id,
> +             __rte_unused int gpu_id,
> +             __rte_unused uintptr_t *mp_addr,
> +            __rte_unused size_t *mp_size)
> +{
> +#ifdef RTE_LIB_GPUDEV
> +     int ret = 0;
> +     char pool_name[RTE_MEMPOOL_NAMESIZE];
> +     struct rte_eth_dev_info dev_info;
> +     struct rte_mempool *rte_mp = NULL;
> +     struct rte_pktmbuf_extmem gpu_mem;
> +     struct rte_gpu_info ginfo;
> +     uint8_t gpu_page_shift = 16;
> +     uint32_t gpu_page_size = (1UL << gpu_page_shift);
> +
> +     if (rte_gpu_count_avail() == 0)
> +             rte_exit(EXIT_FAILURE, "No GPU device available.\n");
> +
> +     if (rte_gpu_info_get(gpu_id, &ginfo))
> +             rte_exit(EXIT_FAILURE,
> +                             "Can't retrieve info about GPU %d - bye\n", 
> gpu_id);
> +
> +     ret = eth_dev_info_get_print_err(port_id, &dev_info);
> +     if (ret != 0)
> +             rte_exit(EXIT_FAILURE,
> +                     "Failed to get device info for port %d\n",
> +                     port_id);
> +
> +     mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), port_id, 
> MBUF_MEM_GPU);
> +     if (!is_proc_primary()) {
> +             rte_mp = rte_mempool_lookup(pool_name);
> +             if (rte_mp == NULL)
> +                     rte_exit(EXIT_FAILURE,
> +                             "Get mbuf pool for socket %u failed: %s\n",
> +                             socket_id, rte_strerror(rte_errno));
> +             return rte_mp;
> +     }
> +
> +     TESTPMD_LOG(INFO,
> +             "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u GPU 
> device=%s\n",
> +             pool_name, nb_mbuf, mbuf_seg_size, socket_id, ginfo.name);
> +
> +     /* Create an external memory mempool using memory allocated on the GPU. 
> */
> +
> +     gpu_mem.elt_size = RTE_MBUF_DEFAULT_BUF_SIZE;
> +     gpu_mem.buf_len = RTE_ALIGN_CEIL(nb_mbuf * gpu_mem.elt_size, 
> gpu_page_size);
> +     gpu_mem.buf_iova = RTE_BAD_IOVA;
> +
> +     gpu_mem.buf_ptr = rte_gpu_mem_alloc(gpu_id, gpu_mem.buf_len);
> +     if (gpu_mem.buf_ptr == NULL)
> +             rte_exit(EXIT_FAILURE, "Could not allocate GPU device 
> memory\n");
> +
> +     ret = rte_extmem_register(gpu_mem.buf_ptr, gpu_mem.buf_len,
> +                     NULL, gpu_mem.buf_iova, gpu_page_size);
> +     if (ret)
> +             rte_exit(EXIT_FAILURE, "Unable to register addr 0x%p, ret 
> %d\n", gpu_mem.buf_ptr, ret);
> +
> +     uint16_t pid = 0;
> +
> +     RTE_ETH_FOREACH_DEV(pid)
> +     {
> +             ret = rte_dev_dma_map(dev_info.device, gpu_mem.buf_ptr,
> +                                       gpu_mem.buf_iova, gpu_mem.buf_len);
> +             if (ret)
> +                     rte_exit(EXIT_FAILURE, "Unable to DMA map addr 0x%p for 
> device %s\n",
> +                                      gpu_mem.buf_ptr, 
> dev_info.device->name);
> +     }
> +
> +     rte_mp = rte_pktmbuf_pool_create_extbuf(pool_name, nb_mbuf,
> +                     mb_mempool_cache, 0, mbuf_seg_size, socket_id, 
> &gpu_mem, 1);
> +     if (rte_mp == NULL)
> +             rte_exit(EXIT_FAILURE,
> +                             "Creation of GPU mempool <%s> failed\n", 
> pool_name);
> +
> +     *mp_addr = (uintptr_t) gpu_mem.buf_ptr;
> +     *mp_size = (size_t) gpu_mem.buf_len;
> +
> +     return rte_mp;
> +#else
> +     rte_exit(EXIT_FAILURE,
> +                     "gpudev library not built. Can't create mempools in GPU 
> memory.\n");
> +#endif
> +}
> +
>  /*
>   * Configuration initialisation done once at init time.
>   */
> @@ -1122,7 +1223,7 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned 
> nb_mbuf,
>  
>       mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
>  #endif
> -     mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
> +     mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx, 
> MBUF_MEM_CPU);
>       if (!is_proc_primary()) {
>               rte_mp = rte_mempool_lookup(pool_name);
>               if (rte_mp == NULL)
> @@ -1709,19 +1810,42 @@ init_config(void)
>  
>               for (i = 0; i < num_sockets; i++)
>                       for (j = 0; j < mbuf_data_size_n; j++)
> -                             mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
> -                                     mbuf_pool_create(mbuf_data_size[j],
> -                                                       nb_mbuf_per_pool,
> -                                                       socket_ids[i], j);
> +                     {

brace should be on the same line as the "for". Given that "if - else"
counts as one statement, the braces may not need to be added at all.

> +                             if (mbuf_mem_types[j] == MBUF_MEM_GPU) {
> +                                     mempools[i * MAX_SEGS_BUFFER_SPLIT + j] 
> =
> +                                             
> gpu_mbuf_pool_create(mbuf_data_size[j],
> +                                                             
> nb_mbuf_per_pool,
> +                                                             socket_ids[i], 
> j, gpu_id,
> +                                                             
> &(mempools_ext_ptr[i]),
> +                                                             
> &(mempools_ext_size[i]));
> +                             } else {
> +                                     mempools[i * MAX_SEGS_BUFFER_SPLIT + j] 
> =
> +                                             
> mbuf_pool_create(mbuf_data_size[j],
> +                                                             
> nb_mbuf_per_pool,
> +                                                             socket_ids[i], 
> j);
> +                             }
> +                     }
>       } else {
>               uint8_t i;
>  
>               for (i = 0; i < mbuf_data_size_n; i++)
> -                     mempools[i] = mbuf_pool_create
> -                                     (mbuf_data_size[i],
> -                                      nb_mbuf_per_pool,
> -                                      socket_num == UMA_NO_CONFIG ?
> -                                      0 : socket_num, i);
> +             {
> +                     if (mbuf_mem_types[i] == MBUF_MEM_GPU) {
> +                             mempools[i] =
> +                                     gpu_mbuf_pool_create(mbuf_data_size[i],
> +                                             nb_mbuf_per_pool,
> +                                             socket_num == UMA_NO_CONFIG ? 0 
> : socket_num,
> +                                             i, gpu_id,
> +                                             &(mempools_ext_ptr[i]),
> +                                             &(mempools_ext_size[i]));
> +                     } else {
> +                             mempools[i] = 
> mbuf_pool_create(mbuf_data_size[i],
> +                                                     nb_mbuf_per_pool,
> +                                                     socket_num == 
> UMA_NO_CONFIG ?
> +                                                     0 : socket_num, i);
> +                     }
> +             }
> +
>       }
>  
>       init_port_config();
> @@ -3434,9 +3558,45 @@ pmd_test_exit(void)
>                       return;
>               }
>       }
> +
>       for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
> -             if (mempools[i])
> +             if (mempools[i]) {
>                       mempool_free_mp(mempools[i]);
> +#ifdef RTE_LIB_GPUDEV
> +                     if (mbuf_mem_types[i] == MBUF_MEM_GPU) {
> +                             if (mempools_ext_ptr[i] != 0) {
> +                                     ret = rte_extmem_unregister(
> +                                                     (void 
> *)mempools_ext_ptr[i],
> +                                                     mempools_ext_size[i]);
> +
> +                                     if (ret)
> +                                             RTE_LOG(ERR, EAL,
> +                                                             
> "rte_extmem_unregister 0x%p -> %d (rte_errno = %d)\n",
> +                                                             (uint8_t 
> *)mempools_ext_ptr[i], ret, rte_errno);
> +
> +                                     RTE_ETH_FOREACH_DEV(pt_id) {
> +                                             struct rte_eth_dev_info 
> dev_info;
> +                                             ret = 
> eth_dev_info_get_print_err(pt_id, &dev_info);
> +                                             if (ret != 0)
> +                                                     rte_exit(EXIT_FAILURE,
> +                                                             "Failed to get 
> device info for port %d\n",
> +                                                             pt_id);
> +
> +                                             ret = 
> rte_dev_dma_unmap(dev_info.device,
> +                                                             (void 
> *)mempools_ext_ptr[i], RTE_BAD_IOVA,
> +                                                             
> mempools_ext_size[i]);
> +
> +                                             if (ret)
> +                                                     RTE_LOG(ERR, EAL,
> +                                                                     
> "rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n",
> +                                                                     
> (uint8_t *)mempools_ext_ptr[i], ret, rte_errno);
> +                                     }
> +
> +                                     rte_gpu_mem_free(gpu_id, (void 
> *)mempools_ext_ptr[i]);
> +                             }
> +                     }
> +#endif

This block as very large amounts of indentation and nesting and should be
reworked to reduce this. Aim for 4 or 5 levels of indent max.

> +             }
>       }
>       free(xstats_display);
>  
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index b1dfd097c7..eca5b041d3 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -9,6 +9,9 @@
>  
>  #include <rte_pci.h>
>  #include <rte_bus_pci.h>
> +#ifdef RTE_LIB_GPUDEV
> +#include <rte_gpudev.h>
> +#endif
>  #ifdef RTE_LIB_GRO
>  #include <rte_gro.h>
>  #endif
> @@ -484,6 +487,11 @@ extern uint8_t dcb_config;
>  extern uint32_t mbuf_data_size_n;
>  extern uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT];
>  /**< Mbuf data space size. */
> +enum mbuf_mem_type {
> +     MBUF_MEM_CPU,
> +     MBUF_MEM_GPU
> +};
> +extern enum mbuf_mem_type mbuf_mem_types[MAX_SEGS_BUFFER_SPLIT];
>  extern uint32_t param_total_num_mbufs;
>  
>  extern uint16_t stats_period;
> @@ -731,14 +739,16 @@ current_fwd_lcore(void)
>  /* Mbuf Pools */
>  static inline void
>  mbuf_poolname_build(unsigned int sock_id, char *mp_name,
> -                 int name_size, uint16_t idx)
> +                 int name_size, uint16_t idx, enum mbuf_mem_type mem_type)
>  {
> +     const char *suffix = mem_type == MBUF_MEM_GPU ? "_gpu" : "";
> +
>       if (!idx)
>               snprintf(mp_name, name_size,
> -                      MBUF_POOL_NAME_PFX "_%u", sock_id);
> +                      MBUF_POOL_NAME_PFX "_%u%s", sock_id, suffix);
>       else
>               snprintf(mp_name, name_size,
> -                      MBUF_POOL_NAME_PFX "_%hu_%hu", (uint16_t)sock_id, idx);
> +                      MBUF_POOL_NAME_PFX "_%hu_%hu%s", (uint16_t)sock_id, 
> idx, suffix);
>  }
>  
>  static inline struct rte_mempool *
> @@ -746,7 +756,7 @@ mbuf_pool_find(unsigned int sock_id, uint16_t idx)
>  {
>       char pool_name[RTE_MEMPOOL_NAMESIZE];
>  
> -     mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx);
> +     mbuf_poolname_build(sock_id, pool_name, sizeof(pool_name), idx, 
> mbuf_mem_types[idx]);
>       return rte_mempool_lookup((const char *)pool_name);
>  }
>  
> -- 
> 2.17.1
> 

Reply via email to