Hello Ophir,

Good thing someone is looking into this.
Thanks.

I have a few comments.


This commitlog is a bit compact.
Separating it with some empty lines would help digest it.


On Wed, May 3, 2023 at 9:27 AM Ophir Munk <ophi...@nvidia.com> wrote:
>
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.
> For applications requiring different values of this
> parameter – it is more convenient to set the max value via an rte API -
> rather than changing the dpdk source code per application.  In many
> organizations, the possibility to compile a private DPDK library for a
> particular application does not exist at all.  With this option there is
> no need to recompile DPDK and it allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of
> steering tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of
> mempool nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.

The code was using a rather short name "RTE_MAX_MEMZONE".
But I prefer we spell this as "max memzones count" (or a better
wording), in the descriptions/comments.


> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone

Afaics, this prototype got unaligned with the patch content, as a
size_t is now taken as input.
You can simply mention rte_memzone_max_set().


> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective

After the patch RTE_MAX_MEMZONE does not exist anymore.


> max memzone: rte_memzone_max_get().
>
> Signed-off-by: Ophir Munk <ophi...@nvidia.com>


A global comment on the patch:

rte_calloc provides what you want in all cases below: an array of objects.
I prefer rte_calloc(count, sizeof elem) to rte_zmalloc(count * sizeof elem).

This also avoids a temporary variable to compute the total size of
such an array.


> ---
>  app/test/test_func_reentrancy.c     |  2 +-
>  app/test/test_malloc_perf.c         |  2 +-
>  app/test/test_memzone.c             | 43 
> ++++++++++++++++++++++++-------------
>  config/rte_config.h                 |  1 -
>  drivers/net/qede/base/bcm_osal.c    | 30 ++++++++++++++++++++------
>  drivers/net/qede/base/bcm_osal.h    |  3 +++
>  drivers/net/qede/qede_main.c        |  7 ++++++
>  lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++---
>  lib/eal/include/rte_memzone.h       | 20 +++++++++++++++++
>  lib/eal/version.map                 |  4 ++++
>  10 files changed, 112 insertions(+), 28 deletions(-)
>
> diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
> index d1ed5d4..ae9de6f 100644
> --- a/app/test/test_func_reentrancy.c
> +++ b/app/test/test_func_reentrancy.c
> @@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
>  #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
>  #define MEMPOOL_SIZE                        (4)
>
> -#define MAX_LCORES     (RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
> +#define MAX_LCORES     (rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
>
>  static uint32_t obj_count;
>  static uint32_t synchro;
> diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
> index ccec43a..9bd1662 100644
> --- a/app/test/test_malloc_perf.c
> +++ b/app/test/test_malloc_perf.c
> @@ -165,7 +165,7 @@ test_malloc_perf(void)
>                 return -1;
>
>         if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, 
> memzone_free,
> -                       NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
> +                       NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
>                 return -1;
>
>         return 0;
> diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
> index c9255e5..36b9790 100644
> --- a/app/test/test_memzone.c
> +++ b/app/test/test_memzone.c
> @@ -871,9 +871,18 @@ test_memzone_bounded(void)
>  static int
>  test_memzone_free(void)
>  {
> -       const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
> +       size_t mz_size;
> +       const struct rte_memzone **mz;
>         int i;
>         char name[20];
> +       int rc = -1;
> +
> +       mz_size = (rte_memzone_max_get() + 1) * sizeof(struct rte_memzone *);
> +       mz = rte_zmalloc("memzone_test", mz_size, 0);
> +       if (!mz) {
> +               printf("Fail allocating memzone test array\n");
> +               return rc;
> +       }
>
>         mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
>                         SOCKET_ID_ANY, 0);
> @@ -881,42 +890,42 @@ test_memzone_free(void)
>                         SOCKET_ID_ANY, 0);
>
>         if (mz[0] > mz[1])
> -               return -1;
> +               goto exit_test;
>         if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
> -               return -1;
> +               goto exit_test;
>         if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
> -               return -1;
> +               goto exit_test;
>
>         if (rte_memzone_free(mz[0])) {
>                 printf("Fail memzone free - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
>                 printf("Found previously free memzone - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
>                         SOCKET_ID_ANY, 0);
>
>         if (mz[2] > mz[1]) {
>                 printf("tempzone2 should have gotten the free entry from 
> tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_free(mz[2])) {
>                 printf("Fail memzone free - tempzone2\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
>                 printf("Found previously free memzone - tempzone2\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_free(mz[1])) {
>                 printf("Fail memzone free - tempzone1\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
>                 printf("Found previously free memzone - tempzone1\n");
> -               return -1;
> +               goto exit_test;
>         }
>
>         i = 0;
> @@ -928,7 +937,7 @@ test_memzone_free(void)
>
>         if (rte_memzone_free(mz[0])) {
>                 printf("Fail memzone free - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
>                         SOCKET_ID_ANY, 0);
> @@ -936,17 +945,21 @@ test_memzone_free(void)
>         if (mz[0] == NULL) {
>                 printf("Fail to create memzone - tempzone0new - when MAX 
> memzones were "
>                                 "created and one was free\n");
> -               return -1;
> +               goto exit_test;
>         }
>
>         for (i = i - 2; i >= 0; i--) {
>                 if (rte_memzone_free(mz[i])) {
>                         printf("Fail memzone free - tempzone%d\n", i);
> -                       return -1;
> +                       goto exit_test;
>                 }
>         }
>
> -       return 0;
> +       rc = 0;
> +
> +exit_test:
> +       rte_free(mz);
> +       return rc;
>  }
>
>  static int test_memzones_left;
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 7b8c85e..400e44e 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -34,7 +34,6 @@
>  #define RTE_MAX_MEM_MB_PER_LIST 32768
>  #define RTE_MAX_MEMSEG_PER_TYPE 32768
>  #define RTE_MAX_MEM_MB_PER_TYPE 65536
> -#define RTE_MAX_MEMZONE 2560
>  #define RTE_MAX_TAILQ 32
>  #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
>  #define RTE_MAX_VFIO_CONTAINERS 64
> diff --git a/drivers/net/qede/base/bcm_osal.c 
> b/drivers/net/qede/base/bcm_osal.c
> index 2c59397..0458768 100644
> --- a/drivers/net/qede/base/bcm_osal.c
> +++ b/drivers/net/qede/base/bcm_osal.c
> @@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
>  }
>
>  /* Array of memzone pointers */
> -static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
> +static const struct rte_memzone **ecore_mz_mapping;
>  /* Counter to track current memzone allocated */
>  static uint16_t ecore_mz_count;
>
> +int ecore_mz_mapping_alloc(void)
> +{
> +       ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
> +               rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);

I think we should allocate only for the first call and we are missing
some refcount.


> +
> +       if (!ecore_mz_mapping)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +
> +void ecore_mz_mapping_free(void)
> +{
> +       rte_free(ecore_mz_mapping);

Won't we release this array while another qed device is still in use?


> +}
> +
>  unsigned long qede_log2_align(unsigned long n)
>  {
>         unsigned long ret = n ? 1 : 0;
> @@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
>         uint32_t core_id = rte_lcore_id();
>         unsigned int socket_id;
>
> -       if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> -               DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -                      RTE_MAX_MEMZONE);
> +       if (ecore_mz_count >= rte_memzone_max_get()) {
> +               DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
> +                      rte_memzone_max_get());
>                 *phys = 0;
>                 return OSAL_NULL;
>         }
> @@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev 
> *p_dev,
>         uint32_t core_id = rte_lcore_id();
>         unsigned int socket_id;
>
> -       if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> -               DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -                      RTE_MAX_MEMZONE);
> +       if (ecore_mz_count >= rte_memzone_max_get()) {
> +               DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
> +                      rte_memzone_max_get());
>                 *phys = 0;
>                 return OSAL_NULL;
>         }
> diff --git a/drivers/net/qede/base/bcm_osal.h 
> b/drivers/net/qede/base/bcm_osal.h
> index 67e7f75..97e261d 100644
> --- a/drivers/net/qede/base/bcm_osal.h
> +++ b/drivers/net/qede/base/bcm_osal.h
> @@ -477,4 +477,7 @@ enum dbg_status     qed_dbg_alloc_user_data(struct 
> ecore_hwfn *p_hwfn,
>         qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
>  #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
>
> +int ecore_mz_mapping_alloc(void);
> +void ecore_mz_mapping_free(void);
> +
>  #endif /* __BCM_OSAL_H */
> diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
> index 0303903..fd63262 100644
> --- a/drivers/net/qede/qede_main.c
> +++ b/drivers/net/qede/qede_main.c
> @@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device 
> *pci_dev,
>         hw_prepare_params.allow_mdump = false;
>         hw_prepare_params.b_en_pacing = false;
>         hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
> +       rc = ecore_mz_mapping_alloc();
> +       if (rc) {
> +               DP_ERR(edev, "mem zones array allocation failed\n");
> +               return rc;
> +       }
> +
>         rc = ecore_hw_prepare(edev, &hw_prepare_params);
>         if (rc) {
>                 DP_ERR(edev, "hw prepare failed\n");
> @@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
>                 return;
>
>         ecore_hw_remove(edev);
> +       ecore_mz_mapping_free();
>  }
>
>  static int qed_send_drv_state(struct ecore_dev *edev, bool active)
> diff --git a/lib/eal/common/eal_common_memzone.c 
> b/lib/eal/common/eal_common_memzone.c
> index a9cd91f..f94330b 100644
> --- a/lib/eal/common/eal_common_memzone.c
> +++ b/lib/eal/common/eal_common_memzone.c
> @@ -22,6 +22,10 @@
>  #include "eal_private.h"
>  #include "eal_memcfg.h"
>
> +#define RTE_DEFAULT_MAX_MEMZONE 2560

No need for a RTE_ prefix for a local macro and ...


> +
> +static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;

... in the end, I see no need for the RTE_DEFAULT_MAX_MEMZONE macro at
all, it is only used as the default init value, here.


> +
>  static inline const struct rte_memzone *
>  memzone_lookup_thread_unsafe(const char *name)
>  {
> @@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
> size_t len,
>         /* no more room in config */
>         if (arr->count >= arr->len) {
>                 RTE_LOG(ERR, EAL,
> -               "%s(): Number of requested memzone segments exceeds 
> RTE_MAX_MEMZONE\n",
> -                       __func__);
> +               "%s(): Number of requested memzone segments exceeds max "
> +               "memzone segments (%d >= %d)\n",

Won't we always display this log for the case when arr->count == arr->len ?
It is then pointless to display both arr->count and arr->len (which
should be formatted as %u).

I would simply log:
RTE_LOG(ERR, EAL,
        "%s(): Number of requested memzone segments exceeds maximum %u\n",
        __func__, arr->len);


> +                       __func__, arr->count, arr->len);
>                 rte_errno = ENOSPC;
>                 return NULL;
>         }
> @@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
>
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>                         rte_fbarray_init(&mcfg->memzones, "memzone",
> -                       RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
> +                       rte_memzone_max_get(), sizeof(struct rte_memzone))) {
>                 RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
>                 ret = -1;
>         } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
> @@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct 
> rte_memzone *, void *),
>         }
>         rte_rwlock_read_unlock(&mcfg->mlock);
>  }
> +
> +int
> +rte_memzone_max_set(size_t max)
> +{
> +       /* Setting max memzone must occur befaore calling rte_eal_init() */

before*

This comment would be better placed in the API description than in the
implementation.


> +       if (eal_get_internal_configuration()->init_complete > 0)
> +               return -1;
> +
> +       memzone_max = max;
> +       return 0;
> +}
> +
> +size_t
> +rte_memzone_max_get(void)
> +{
> +       return memzone_max;
> +}
> diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
> index 5302caa..3ff7c73 100644
> --- a/lib/eal/include/rte_memzone.h
> +++ b/lib/eal/include/rte_memzone.h
> @@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
>  void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
>                       void *arg);
>
> +/**

 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice

> + * Set max memzone value
> + *
> + * @param max
> + *   Value of max memzone allocations

I'd rather describe as:
"Maximum number of memzones"

Please also mention that this function can only be called prior to
rte_eal_init().


> + * @return
> + *  0 on success, -1 otherwise
> + */
> +__rte_experimental
> +int rte_memzone_max_set(size_t max);
> +
> +/**

 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice

> + * Get max memzone value

Get the maximum number of memzones.

And we can remind the developer, here, that this value won't change
after rte_eal_init.


> + *
> + * @return
> + *   Value of max memzone allocations
> + */
> +__rte_experimental
> +size_t rte_memzone_max_get(void);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index 51a820d..b52a83c 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -430,6 +430,10 @@ EXPERIMENTAL {
>         rte_thread_create_control;
>         rte_thread_set_name;
>         __rte_eal_trace_generic_blob;
> +
> +       # added in 23.07
> +       rte_memzone_max_set;
> +       rte_memzone_max_get;
>  };
>
>  INTERNAL {
> --
> 2.8.4
>


-- 
David Marchand

Reply via email to