opdl: add the opdl ring infrastructure library

Jerin Jacob Sat, 16 Dec 2017 02:15:16 -0800

-----Original Message-----
> Date: Fri, 15 Dec 2017 11:26:22 +0000
> From: Liang Ma <[email protected]>
> To: [email protected]
> CC: [email protected], [email protected], [email protected],
>  [email protected], [email protected], [email protected],
>  [email protected]
> Subject: [PATCH v2 1/8] event/opdl:  add the opdl ring infrastructure
>  library
> X-Mailer: git-send-email 2.7.5
> 
> OPDL ring is the core infrastructure of OPDL PMD. OPDL ring library
> provide the core data structure and core helper function set. The Ring
> implements a single ring multi-port/stage pipelined packet distribution
> mechanism. This mechanism has the following characteristics:
> 
> • No multiple queue cost, therefore, latency is significant reduced.
> • Fixed dependencies between queue/ports is more suitable for complex.
>   fixed pipelines of stateless packet processing (static pipeline).
> • Has decentralized distribution (no scheduling core).
> • Packets remain in order (no reorder core(s)).
> 
> Signed-off-by: Liang Ma <[email protected]>
> Signed-off-by: Peter, Mccarthy <[email protected]>


1) Invalid Signed-off-by format. "," after first name.


2) There are some compilation issues with series
/export/dpdk-next-eventdev/drivers/event/opdl/opdl_evdev_init.c: In
function ‘create_queues_and_rings’: 
/export/dpdk-next-eventdev/drivers/event/opdl/opdl_evdev_init.c:570:17:
error: ‘%s’ directive writing up to 63 bytes into a region of size 32
[-Werror=format-overflow=]
  sprintf(name, "%s_%u", device->service_name, device->nb_opdls);
                 ^~
/export/dpdk-next-eventdev/drivers/event/opdl/opdl_evdev_init.c:570:2: note: 
‘sprintf’ output between 3 and 75 bytes into a destination of size
32
  sprintf(name, "%s_%u", device->service_name, device->nb_opdls);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/export/dpdk-next-eventdev/drivers/event/opdl/opdl_evdev_init.c:570:17: error: 
‘%s’ directive writing up to 63 bytes into a region of size 32
[-Werror=format-overflow=]
  sprintf(name, "%s_%u", device->service_name, device->nb_opdls);
                 ^~
/export/dpdk-next-eventdev/drivers/event/opdl/opdl_evdev_init.c:570:2: note: 
‘sprintf’ output between 3 and 75 bytes into a destination of size 32
  sprintf(name, "%s_%u", device->service_name, device->nb_opdls);

3) Please rebase to next-eventdev tree. Gage already added a new capability flag


> ---
> +
> +# library name
> +LIB = librte_pmd_opdl_event.a
> +
> +# build flags
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +# for older GCC versions, allow us to initialize an event using
> +# designated initializers.
> +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> +ifeq ($(shell test $(GCC_VERSION) -le 50 && echo 1), 1)
> +CFLAGS += -Wno-missing-field-initializers
> +endif
> +endif
> +
> +LDLIBS += -lrte_eal -lrte_eventdev -lrte_kvargs -lrte_ring

Does it have -lrte_ring dependency?

> +LDLIBS += -lrte_bus_vdev -lrte_mbuf -lrte_mempool
> +
> +# library version
> +LIBABIVER := 1
> +
> +# versioning export map
> +EXPORT_MAP := rte_pmd_evdev_opdl_version.map
> +
> +# library source files
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev_init.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_ring.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev_xstats.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_test.c

Each patch should be build able, Add the files when you really add the
source code.


> +
> +# export include files
> +SYMLINK-y-include +=
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c
> new file mode 100644
> index 0000000..5120fbe
> --- /dev/null
> +++ b/drivers/event/opdl/opdl_ring.c
> @@ -0,0 +1,1232 @@
> +/*-
> + * <COPYRIGHT_TAG>

??

> + */
> +
> +#include <stdbool.h>
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +
> +#include <rte_branch_prediction.h>
> +#include <rte_debug.h>
> +#include <rte_lcore.h>
> +#include <rte_log.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_memory.h>
> +#include <rte_memzone.h>
> +#include <rte_eal_memconfig.h>
> +
> +#include "opdl_ring.h"
> +
> +#define LIB_NAME "opdl_ring"
> +
> +#define OPDL_NAME_SIZE 64
> +
> +#define RTE_LOGTYPE_OPDL RTE_LOGTYPE_USER8
> +#define log(level, fmt, ...) \
> +             RTE_LOG(level, OPDL, LIB_NAME": " fmt "\n", ##__VA_ARGS__)
> +
> +#ifdef OPDL_DEBUG
> +#define log_debug(...) log(DEBUG, __VA_ARGS__)
> +#else
> +#define log_debug(...)
> +#endif

For new PMDs, Use dynamic logging


> +
> +#define POWER_OF_2(n) ((n) && !((n) & ((n) - 1)))

I guess, it is available as standard DPDK macro in common area. If not,
create new one.

> +
> +#define RTE_EVENT_MASK  (0xFFFF0000000FFFFFULL)

Please don't use RTE_ for PMD specific marcos.

> +
> +/* Types of dependency between stages */
> +enum dep_type {
> +     DEP_NONE = 0,  /* no dependency */
> +     DEP_DIRECT,  /* stage has direct dependency */
> +     DEP_INDIRECT,  /* in-direct dependency through other stage(s) */
> +     DEP_SELF,  /* stage dependency on itself, used to detect loops */
> +};
> +
> +/* Shared section of stage state.
> + * Care is needed when accessing and the layout is important, especially to
> + * limit the adjacent cache-line HW prefetcher from impacting performance.
> + */
> +struct shared_state {
> +     /* Last known minimum sequence number of dependencies, used for multi
> +      * thread operation
> +      */
> +     uint32_t available_seq;
> +     char _pad1[RTE_CACHE_LINE_SIZE * 3];
> +     uint32_t head;  /* Head sequence number (for multi thread operation) */
> +     char _pad2[RTE_CACHE_LINE_SIZE * 3];
> +     struct opdl_stage *stage;  /* back pointer */
> +     uint32_t tail;  /* Tail sequence number */
> +     char _pad3[RTE_CACHE_LINE_SIZE * 2];
> +} __rte_cache_aligned;
> +
> +/* A structure to keep track of "unfinished" claims. This is only used for
> + * stages that are threadsafe. Each lcore accesses its own instance of this
> + * structure to record the entries it has claimed. This allows one lcore to 
> make
> + * multiple claims without being blocked by another. When disclaiming it 
> moves
> + * forward the shared tail when the shared tail matches the tail value 
> recorded
> + * here.
> + */
> +struct claim_manager {
> +     uint32_t num_to_disclaim;
> +     uint32_t num_claimed;
> +     uint32_t mgr_head;
> +     uint32_t mgr_tail;
> +     struct {
> +             uint32_t head;
> +             uint32_t tail;
> +     } claims[OPDL_DISCLAIMS_PER_LCORE];
> +} __rte_cache_aligned;
> +
> +/* Context for each stage of opdl_ring.
> + * Calculations on sequence numbers need to be done with other uint32_t 
> values
> + * so that results are modulus 2^32, and not undefined.
> + */
> +struct opdl_stage {
> +     struct opdl_ring *t;  /* back pointer, set at init */
> +     uint32_t num_slots;  /* Number of slots for entries, set at init */
> +     uint32_t index;  /* ID for this stage, set at init */
> +     bool threadsafe;  /* Set to 1 if this stage supports threadsafe use */
> +     /* Last known min seq number of dependencies for used for single thread
> +      * operation
> +      */
> +     uint32_t available_seq;
> +     uint32_t head;  /* Current head for single-thread operation */
> +     uint32_t shadow_head;  /* Shadow head for single-thread operation */
> +     uint32_t nb_instance;  /* Number of instances */
> +     uint32_t instance_id;  /* ID of this stage instance */
> +     uint16_t num_claimed;  /* Number of slots claimed */
> +     uint16_t num_event;             /* Number of events */
> +     uint32_t seq;                   /* sequence number  */
> +     uint32_t num_deps;  /* Number of direct dependencies */
> +     /* Keep track of all dependencies, used during init only */
> +     enum dep_type *dep_tracking;
> +     /* Direct dependencies of this stage */
> +     struct shared_state **deps;
> +     /* Other stages read this! */
> +     struct shared_state shared __rte_cache_aligned;
> +     /* For managing disclaims in multi-threaded processing stages */
> +     struct claim_manager pending_disclaims[RTE_MAX_LCORE]
> +                                            __rte_cache_aligned;
> +} __rte_cache_aligned;
> +
> +/* Context for opdl_ring */
> +struct opdl_ring {
> +     char name[OPDL_NAME_SIZE];  /* OPDL queue instance name */
> +     int socket;  /* NUMA socket that memory is allocated on */
> +     uint32_t num_slots;  /* Number of slots for entries */
> +     uint32_t mask;  /* Mask for sequence numbers (num_slots - 1) */
> +     uint32_t slot_size;  /* Size of each slot in bytes */
> +     uint32_t num_stages;  /* Number of stages that have been added */
> +     uint32_t max_num_stages;  /* Max number of stages */
> +     /* Stages indexed by ID */
> +     struct opdl_stage *stages;
> +     /* Memory for storing slot data */
> +     uint8_t slots[0] __rte_cache_aligned;
> +};
> +
> +
> +/* Return input stage of a opdl_ring */
> +static inline struct opdl_stage *__attribute__((always_inline))

Change to __rte_always_inline everywhere in the driver.

> +input_stage(const struct opdl_ring *t)
> +{
> +     return &t->stages[0];
> +}
> +
> +}
> +
> +/* Move head atomically, returning number of entries available to process and
> + * the original value of head. For non-input stages, the claim is recorded
> + * so that the tail can be updated later by opdl_stage_disclaim().
> + */
> +static inline void __attribute__((always_inline))
> +move_head_atomically(struct opdl_stage *s, uint32_t *num_entries,
> +             uint32_t *old_head, bool block, bool claim_func)
> +{
> +     uint32_t orig_num_entries = *num_entries;
> +     uint32_t ret;
> +     struct claim_manager *disclaims = &s->pending_disclaims[rte_lcore_id()];
> +
> +     /* Attempt to disclaim any outstanding claims */
> +     opdl_stage_disclaim_multithread_n(s, disclaims->num_to_disclaim,
> +                     false);
> +
> +     *old_head = __atomic_load_n(&s->shared.head, __ATOMIC_ACQUIRE);

I guess __atomic introduced after gcc 4.7.
Make sure the PMD does not build if __atomic_* not available.
See CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD file in 
mk/toolchain/icc/rte.toolchain-compat.mk


> diff --git a/drivers/event/opdl/opdl_ring.h b/drivers/event/opdl/opdl_ring.h
> new file mode 100644
> index 0000000..cc37bd1
> --- /dev/null
> +++ b/drivers/event/opdl/opdl_ring.h
> @@ -0,0 +1,601 @@
> +/*-te
> + *
> + * <COPYRIGHT_TAG>

??

> + */
> +
> +#ifndef _OPDL_H_
> +#define _OPDL_H_
> +
> +/**
> + * @file
> + * The "opdl_ring" is a data structure that contains a fixed number of slots,
> + * with each slot having the same, but configurable, size. Entries are input
> + * into the opdl_ring by copying into available slots. Once in the opdl_ring,
> + * an entry is processed by a number of stages, with the ordering of stage
> + * processing controlled by making stages dependent on one or more other 
> stages.
> + * An entry is not available for a stage to process until it has been 
> processed
> + * by that stages dependencies. Entries are always made available for
> + * processing in the same order that they were input in to the opdl_ring.
> + * Inputting is considered as a stage that depends on all other stages,
> + * and is also a dependency of all stages.
> + *
> + * Inputting and processing in a stage can support multi-threading. Note that
> + * multi-thread processing can also be done by making stages co-operate e.g. 
> two
> + * stages where one processes the even packets and the other processes odd
> + * packets.
> + *
> + * A opdl_ring can be used as the basis for pipeline based applications. 
> Instead
> + * of each stage in a pipeline dequeueing from a ring, processing and 
> enqueueing
> + * to another ring, it can process entries in-place on the ring. If stages do
> + * not depend on each other, they can run in parallel.
> + *
> + * The opdl_ring works with entries of configurable size, these could be
> + * pointers to mbufs, pointers to mbufs with application specific meta-data,
> + * tasks etc.
> + */
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +
> +#include <rte_eventdev.h>
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#ifndef OPDL_DISCLAIMS_PER_LCORE

Move this configuration to base/config or even better to expose as devargs

> +/** Multi-threaded processing allows one thread to process multiple batches 
> in a
> + * stage, while another thread is processing a single large batch. This 
> number
> + * controls how many non-contiguous batches one stage can process before 
> being
> + * blocked by the other stage.
> + */
> +#define OPDL_DISCLAIMS_PER_LCORE 8
> +#endif
> +
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif  /* _OPDL_H_ */
> diff --git a/drivers/event/opdl/rte_pmd_evdev_opdl_version.map 
> b/drivers/event/opdl/rte_pmd_evdev_opdl_version.map
> new file mode 100644
> index 0000000..5352e7e
> --- /dev/null
> +++ b/drivers/event/opdl/rte_pmd_evdev_opdl_version.map
> @@ -0,0 +1,3 @@
> +DPDK_17.05 {

DPDK_18.02

> +     local: *;
> +};
> -- 
> 2.7.5
> 
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.

Remove such notice from public mailing lists.

Re: [dpdk-dev] [PATCH v2 1/8] event/opdl: add the opdl ring infrastructure library

Reply via email to