Hi Andery,
PSB

> -----Original Message-----
> From: Andrey Vesnovaty <andr...@nvidia.com>
> Sent: Wednesday, September 9, 2020 11:30 PM
> To: dev@dpdk.org
> Subject: [RFC 3/3] sft: introduce API
> 
> Defines RTE SFT APIs for Statefull Flow Table library.
> 
> SFT General description:
> SFT library provides a framework for applications that need to maintain
> context across different packets of the connection.
> Examples for such applications:
> - Next-generation firewalls
> - Intrusion detection/prevention systems (IDS/IPS): Suricata, snort
> - SW/Virtual Switching: OVS
> The goals of the SFT library:
> - Accelerate flow recognition & its context retrieval for further
>   lookaside processing.
> - Enable context-aware flow handling offload.
> 
> Signed-off-by: Andrey Vesnovaty <andr...@nvidia.com>
> ---
>  lib/librte_sft/Makefile            |  28 +
>  lib/librte_sft/meson.build         |   7 +
>  lib/librte_sft/rte_sft.c           |   9 +
>  lib/librte_sft/rte_sft.h           | 845 +++++++++++++++++++++++++++++
>  lib/librte_sft/rte_sft_driver.h    | 195 +++++++
>  lib/librte_sft/rte_sft_version.map |  21 +
>  6 files changed, 1105 insertions(+)
>  create mode 100644 lib/librte_sft/Makefile
>  create mode 100644 lib/librte_sft/meson.build
>  create mode 100644 lib/librte_sft/rte_sft.c
>  create mode 100644 lib/librte_sft/rte_sft.h
>  create mode 100644 lib/librte_sft/rte_sft_driver.h
>  create mode 100644 lib/librte_sft/rte_sft_version.map
> 
> diff --git a/lib/librte_sft/Makefile b/lib/librte_sft/Makefile
> new file mode 100644
> index 0000000000..23c6eee849
> --- /dev/null
> +++ b/lib/librte_sft/Makefile
> @@ -0,0 +1,28 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2020 Mellanox Technologies, Ltd
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +# library name
> +LIB = librte_sft.a
> +
> +# library version
> +LIBABIVER := 1
> +
> +# build flags
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +LDLIBS += -lrte_eal -lrte_mbuf
> +
> +# library source files
> +# all source are stored in SRCS-y
> +SRCS-$(CONFIG_RTE_LIBRTE_REGEXDEV) := rte_sft.c
> +
> +# export include files
> +SYMLINK-$(CONFIG_RTE_LIBRTE_REGEXDEV)-include += rte_sft.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_REGEXDEV)-include += rte_sft_driver.h
> +
> +# versioning export map
> +EXPORT_MAP := rte_sft_version.map
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sft/meson.build b/lib/librte_sft/meson.build
> new file mode 100644
> index 0000000000..b210e43f29
> --- /dev/null
> +++ b/lib/librte_sft/meson.build
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2020 Mellanox Technologies, Ltd
> +
> +sources = files('rte_sft.c')
> +headers = files('rte_sft.h',
> +     'rte_sft_driver.h')
> +deps += ['mbuf']
> diff --git a/lib/librte_sft/rte_sft.c b/lib/librte_sft/rte_sft.c
> new file mode 100644
> index 0000000000..f3d3945545
> --- /dev/null
> +++ b/lib/librte_sft/rte_sft.c
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +
> +#include "rte_sft.h"
> +#include "rte_sft_driver.h"
> +
> +/* Placeholder for RTE SFT library APIs implementation */
> diff --git a/lib/librte_sft/rte_sft.h b/lib/librte_sft/rte_sft.h
> new file mode 100644
> index 0000000000..5c9f92ea9f
> --- /dev/null
> +++ b/lib/librte_sft/rte_sft.h
> @@ -0,0 +1,845 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#ifndef _RTE_SFT_H_
> +#define _RTE_SFT_H_
> +
> +/**
> + * @file
> + *
> + * RTE SFT API
> + *
> + * Defines RTE SFT APIs for Statefull Flow Table library.
> + *
> + * SFT General description:
> + * SFT library provides a framework for applications that need to maintain
> + * context across different packets of the connection.
> + * Examples for such applications:
> + * - Next-generation firewalls
> + * - Intrusion detection/prevention systems (IDS/IPS): Suricata, Snort
> + * - SW/Virtual Switching: OVS
> + * The goals of the SFT library:
> + * - Accelerate flow recognition & its context retrieval for further 
> lookaside
> + *   processing.
> + * - Enable context-aware flow handling offload.
> + *
> + * Definitions and Abbreviations:
> + * - 5-tuple: defined by:
> + *     -- Source IP address
> + *     -- Source port
> + *     -- Destination IP address
> + *     -- Destination port
> + *     -- IP protocol number
> + * - 7-tuple: 5-tuple zone and port (see struct rte_sft_7tuple)
> + * - 5/7-tuple: 5/7-tuple of the packet from connection initiator
> + * - revers 5/7-tuple: 5/7-tuple of the packet from connection initiate
> + * - application: SFT library API consumer
> + * - APP: see application
> + * - CID: client ID
> + * - CT: connection tracking
> + * - FID: Flow identifier
> + * - FIF: First In Flow
> + * - Flow: defined by 7-tuple and its reverse i.e. flow is bidirectional
> + * - SFT: Stateful Flow Table
> + * - user: see application
> + * - zone: additional user defined value used as differentiator for
> + *         connections having same 5-tuple (for example different VxLan
> + *         connections with same inner 5-tuple).
> + *
> + * SFT components:
> + *
> + * +-----------------------------------+
> + * | RTE flow                          |
> + * |                                   |
> + * | +-------------------------------+ |  +----------------+
> + * | | group X                       | |  | RTE_SFT        |
> + * | |                               | |  |                |
> + * | | +---------------------------+ | |  |                |
> + * | | | rule ...                  | | |  |                |
> + * | | | .                         | | |  +-----------+----+
> + * | | | .                         | | |              |
> + * | | | .                         | | |          entry
> + * | | +---------------------------+ | |            create
> + * | | | rule                      | | |              |
> + * | | |   patterns ...            +---------+        |
> + * | | |   actions                 | | |     |        |
> + * | | |     SFT (zone=Z)          | | |     |        |
> + * | | |     JUMP (group=Y)        | | |  lookup      |
> + * | | +---------------------------+ | |    zone=Z,   |
> + * | | | rule ...                  | | |    5tuple    |
> + * | | | .                         | | |     |        |
> + * | | | .                         | | |  +--v-------------+
> + * | | | .                         | | |  | SFT       |    |
> + * | | |                           | | |  |           |    |
> + * | | +---------------------------+ | |  |        +--v--+ |
> + * | |                               | |  |        |     | |
> + * | +-------------------------------+ |  |        | PMD | |
> + * |                                   |  |        |     | |
> + * |                                   |  |        +-----+ |
> + * | +-------------------------------+ |  |                |
> + * | | group Y                       | |  |                |
> + * | |                               | |  | set flow CTX   |
> + * | | +---------------------------+ | |  |                |
> + * | | | rule                      | | |  +--------+-------+
> + * | | |   patterns                | | |           |
> + * | | |     SFT (state=UNDEFINED) | | |           |
> + * | | |   actions RSS             | | |           |
> + * | | +---------------------------+ | |           |
> + * | | | rule                      | | |           |
> + * | | |   patterns                | | |           |
> + * | | |     SFT (state=INVALID)   | <-------------+
> + * | | |   actions DROP            | | |  forward
> + * | | +---------------------------+ | |    group=Y
> + * | | | rule                      | | |
> + * | | |   patterns                | | |
> + * | | |     SFT (state=ACCEPTED)  | | |
> + * | | |   actions PORT            | | |
> + * | | +---------------------------+ | |
> + * | |  ...                          | |
> + * | |                               | |
> + * | +-------------------------------+ |
> + * |  ...                              |
> + * |                                   |
> + * +-----------------------------------+
> + *
> + * SFT as datastructure:
> + * SFT can be treated as datastructure maintaining flow context across its
> + * lifetime. SFT flow entry represent bidirectional network flow and defined 
> by
> + * 7-tuple & its reverse 7-tuple.
> + * Each entry in SFT has:
> + * - FID: 1:1 mapped & used as entry handle & encapsulating internal
> + *   implementation of the entry.
> + * - State: user-defined value attached to each entry, the only library
> + *   reserved value for state unset (the actual value defined by SFT
> + *   configuration). The application should define flow state encodings and
> + *   set it for flow via rte_sft_flow_set_ctx() than what actions should be
> + *   applied on packets can be defined via related RTE flow rule matching SFT
> + *   state (see rules in SFT components diagram above).
> + * - Timestamp: for the last seen in flow packet used for flow aging
> mechanism
> + *   implementation.
> + * - Client Objects: user-defined flow contexts attached as opaques to flow.
> + * - Acceleration & offloading - utilize RTE flow capabilities, when 
> supported
> + *   (see action ``SFT``), for flow lookup acceleration and further
> + *   context-aware flow handling offload.
> + * - CT state: optionally for TCP connections CT state can be maintained
> + *   (see enum rte_sft_flow_ct_state).
> + * - Out of order TCP packets: optionally SFT can keep out of order TCP
> + *   packets aside the flow context till the arrival of the missing in-order
> + *   packet.
> + *
> + * RTE flow changes:
> + * The SFT flow state (or context) for RTE flow is defined by fields of
> + * struct rte_flow_item_sft.
> + * To utilize SFT capabilities new item and action types introduced:
> + * - item SFT: matching on SFT flow state (see RTE_FLOW_ITEM_TYPE_SFT).
> + * - action SFT: retrieve SFT flow context and attache it to the processed
> + *   packet (see RTE_FLOW_ACTION_TYPE_SFT).
> + *
> + * The contents of per port SFT serving RTE flow action ``SFT`` managed via
> + * SFT PMD APIs (see struct rte_sft_ops).
> + * The SFT flow state/context retrieval performed by user-defined zone 
> ``SFT``
> + * action argument and processed packet 5-tuple.
> + * If in scope of action ``SFT`` there is no context/state for the flow in 
> SFT
> + * undefined sate attached to the packet meaning that the flow is not
> + * recognized by SFT, most probably FIF packet.
> + *
> + * Once the SFT state set for a packet it can match on item SFT
> + * (see RTE_FLOW_ITEM_TYPE_SFT) and forwarding design can be done for
> the
> + * packet, for example:
> + * - if state value == x than queue for further processing by the application
> + * - if state value == y than forward it to eth port (full offload)
> + * - if state value == 'undefined' than queue for further processing by
> + *   the application (handle FIF packets)
> + *
> + * Processing packets with SFT library:
> + *
> + * FIF packet:
> + * To recognize upcoming packets of the SFT flow every FIF packet should be
> + * forwarded to the application utilizing the SFT library. Non-FIF packets 
> can
> + * be processed by the application or its processing can be fully offloaded.
> + * Processing of the packets in SFT library starts with rte_sft_process_mbuf
> + * or rte_sft_process_mbuf_with_zone. If mbuf recognized as FIF application
> + * should make a design to destroy flow or complete flow creation process in
> + * SFT using rte_sft_flow_activate.
> + *
> + * Recognized SFT flow:
> + * Once struct rte_sft_flow_status with valid fid field posesed by 
> application
> + * it can:
> + * - mange client objects on it (see client_obj field in
> + *   struct rte_sft_flow_status) using rte_sft_flow_<OP>_client_obj APIs
> + * - analyze user-defined flow state and CT state (see state & ct_sate fields
> + *   in struct rte_sft_flow_status).
> + * - set flow state to be attached to the upcoming packets by action ``SFT``
> + *   via struct rte_sft_flow_status API.
> + * - decide to destroy flow via rte_sft_flow_destroy API.
> + *
> + * Flow aging:
> + *
> + * SFT library manages the aging for each flow. On flow creation, it's
> + * assigned an aging value, the maximal number of seconds passed since the
> + * last flow packet arrived, once exceeded flow considered aged.
> + * The application notified of aged flow asynchronously via event queues.
> + * The device and port IDs tuple to identify the event queue to enqueue
> + * flow aged events passed on flow creation as arguments
> + * (see rte_sft_flow_activate). It's the application responsibility to
> + * initialize event queues and assign them to each flow for EOF event
> + * notifications.
> + * Aged EOF event handling:
> + * - Should be considered as application responsibility.
> + * - The last stage should be the release of the flow resources via
> + *    rte_sft_flow_destroy API.
> + * - All client objects should be removed from flow before the
> + *   rte_sft_flow_destroy API call.
> + * See the description of rete_sft_flow_destroy for an example of aged flow
> + * handling.
> + *
> + * SFT API thread safety:
> + *
> + * SFT library APIs are thread-safe while handling of specific flow can be
> + * done in a single thread simultaneously. Exclusive access to specific SFT
> + * flow guaranteed by:

The line above contradict itself, if you are working with single thread you 
can't work simultaneously.
Does the SFT allow the access to a single flow from two threads in the same 
time? or it is the responsibility 
Of the application to protect itself. I think it should be the application 
responsibility the SFT should protect
itself only on SFT global functions. For example calling process_mbuf should be 
protected, so application can 
call the same function from different threads.
I think we can assume that all packets from a specific flow will arrive to the 
same queue and the same thread.

So I don't see the usage of the lock API.
 
> + * - rte_sft_process_mbuf
> + * - rte_sft_process_mbuf_with_zone
> + * - rte_sft_flow_create
> + * - rte_sft_flow_lock
> + * When application is done with the flow handling for the current packet it
> + * should call rte_sft_flow_unlock API to maintain exclusive access to the
> + * flow with other threads.
> + *
> + * SFT Library initialization and cleanup:
> + *
> + * SFT library should be considered as a single instance, preconfigured and
> + * initialized via rte_sft_init() API.
> + * SFT library resource deallocation and cleanup should be done via
> + * rte_sft_init() API as a stage of the application termination procedure.
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_common.h>
> +#include <rte_config.h>
> +#include <rte_errno.h>
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +#include <rte_flow.h>
> +
> +/**
> + * L3/L4 5-tuple - src/dest IP and port and IP protocol.
> + *
> + * Used for flow/connection identification.
> + */
> +struct rte_sft_5tuple {
> +     union {
> +             struct {
> +                     rte_be32_t src_addr; /**< IPv4 source address. */
> +                     rte_be32_t dst_addr; /**< IPv4 destination address. */
> +             } ipv4;
> +             struct {
> +                     uint8_t src_addr[16]; /**< IPv6 source address. */
> +                     uint8_t dst_addr[16]; /**< IPv6 destination address. */
> +             } ipv6;
> +     };
> +     uint16_t src_port; /**< Source port. */
> +     uint16_t dst_port; /**< Destination port. */
> +     uint8_t proto; /**< IP protocol. */
> +     uint8_t is_ipv6: 1; /**< True for valid IPv6 fields. Otherwise IPv4. */
> +};
> +
> +/**
> + * Port flow identification.
> + *
> + * @p zone used for setups where 5-tuple is not enough to identify flow.
> + * For example different VLANs/VXLANs may have similar 5-tuples.
> + */
> +struct rte_sft_7tuple {
> +     struct rte_sft_5tuple flow_5tuple; /**< L3/L4 5-tuple. */
> +     uint32_t zone; /**< Zone assigned to flow. */
> +     uint16_t port_id; /** <Port identifier of Ethernet device. */
> +};
> +
> +/**
> + * Flow connection tracking states
> + */
> +enum rte_sft_flow_ct_state {
> +     RTE_SFT_FLOW_CT_STATE_NEW  = (1 << 0),
> +     RTE_SFT_FLOW_CT_STATE_EST  = (1 << 1),
> +     RTE_SFT_FLOW_CT_STATE_REL  = (1 << 2),
> +     RTE_SFT_FLOW_CT_STATE_RPL  = (1 << 3),
> +     RTE_SFT_FLOW_CT_STATE_INV  = (1 << 4),
> +     RTE_SFT_FLOW_CT_STATE_TRK  = (1 << 5),
> +     RTE_SFT_FLOW_CT_STATE_SNAT = (1 << 6),
> +     RTE_SFT_FLOW_CT_STATE_DNAT = (1 << 7),
> +};
> +
> +/**
> + * Structure describes SFT library configuration
> + */
> +struct rte_sft_conf {
> +     uint32_t UDP_aging; /**< UDP proto default aging. */
> +     uint32_t TCP_aging; /**< TCP proto default aging. */
> +     uint32_t TCP_SYN_aging; /**< TCP SYN default aging. */
> +     uint32_t OTHER_aging; /**< All unlisted proto default aging. */
> +     uint32_t size; /**< Max entries in SFT. */
> +     uint8_t undefined_state; /**< Undefined state constant. */
> +     uint8_t reorder_enable: 1;
> +     /**< TCP packet reordering feature enabled bit. */
> +     uint8_t ct_enable: 1; /**< Connection tracking feature enabled bit. */
> +};
> +
> +/**
> + * Structure describes the state of the flow in SFT.
> + */
> +struct rte_sft_flow_status {
> +     uint32_t fid; /**< SFT flow id. */
> +     uint32_t zone; /**< Zone for lookup in SFT */
> +     uint8_t state; /**< Application defined bidirectional flow state. */
> +     uint8_t ct_state; /**< Connection tracking flow state. */
> +     uint32_t age; /**< Seconds passed since last flown packet. */
> +     uint32_t aging;
> +     /**< Flow considered aged once this age (seconds) reached. */
> +     uint32_t nb_in_order_mbufs;
> +     /**< Number of in-order mbufs available for drain */
> +     void **client_obj; /**< Array of clients attached to flow. */
> +     int nb_clients; /**< Number of clients attached to flow. */
> +     uint8_t defined: 1; /**< Flow defined in SFT bit. */
> +     uint8_t activated: 1; /**< Flow activation bit. */
> +     uint8_t fragmented: 1; /**< Last flow mbuf was fragmented. */
> +     uint8_t out_of_order: 1; /**< Last flow mbuf was out of order (TCP). */
> +};
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Get SFT flow status.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param[out] status
> + *   Structure to dump actual SFT flow status.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_get_status(const uint32_t fid,
> +                     struct rte_sft_flow_status *status,
> +                     struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Set user defined context.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * Updates per ethernet dev SFT entries:
> + * - flow lookup acceleration
> + * - partial/full flow offloading managed by flow context
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param ctx
> + *   User defined state to set.
> + *   Update of *fid* or *zone* fields in struct rte_flow_item_sft 
> unsupported.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success , a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_set_ctx(uint32_t fid,
> +                  const struct rte_flow_item_sft *ctx,
> +                  struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Initialize SFT library instance.
> + *
> + * @param conf
> + *   SFT library instance configuration.
> + *
> + * @return
> + *   0 on success , a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_init(const struct rte_sft_conf *conf);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Finalize SFT library instance.
> + * Cleanup & release allocated resources.
> + */
> +void
> +rte_sft_fini(void);
> +

I think we should use stop. It is not commons in DPDK to have fini functions.
Maybe we should also add start function, so the app can init and then start the 
SFT.

> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Process mbuf received on RX queue.
> + *
> + * Fragmentation handling (SFT fragmentation feature configured):
> + * If *mbuf_in* of fragmented packet received it will be stored by SFT 
> library.
> + * status->fragmented bit will be set and *mbuf_out* will be set to NULL.
> + * On reception of all related fragments of IP packet it will be reassembled
> + * and further processed by this function on reception of last fragment.
> + *
Does this function allocate a new mbuf? Does it releases all old mbufs?

> + * Flow definition:
> + * SFT flow defined by one of its 7-tuples, since there is no zone value as
> + * argument flow should be defined by context attached to mbuf with action
> + * ``SFT`` (see RTE flow RTE_FLOW_ACTION_TYPE_SFT). Otherwise status-
> >defined
> + * field will be turned off & *mbuf_out* will be set to *mbuf_in*.
> + * In order to define flow for *mbuf_in* without attached sft context
> + * rte_sft_process_mbuf_with_zone() should be used with *zone* argument
> + * supplied by caller.
> + *
> + * Flow lookup:
> + * If SFT flow identifier can't be retrieved from SFT context attached to
> + * *mbuf_in* by action ``SFT`` - SFT lookup should be performmed by zone,
> + * retrieved from SFT context attached to *mbuf_in*, and 5-tuple, extracted
> + * form mbuf outer header contents.
> + *
> + * Flow defined but does not exists:
> + * If flow not found in SFT inactivated flow will be created in SFT.
> + * status->activated field will be turned off & *mbuf_out* be set to
> *mbuf_in*.
> + * In order to activate created flow rte_sft_flow_activate() should be used
> + * with reverse 7-tuple supplied by caller.
> + * This is first phase of flow creation in SFT for second phase & more 
> detailed
> + * descriotion of flow creation see rte_sft_flow_activate.
> + *
> + * Out of order (SFT out of oreder feature configured):
> + * If flow defined & activated but *mbuf_in* is TCP out of order packet it 
> will
> + * be stored by SFT library. status->out_of_order bit will be set & 
> *mbuf_out*
> + * will be set to NULL. On reception of the first missing in order packet
> + * status->nb_in_order_mbufs will be set to number of mbufs that available
> for
> + * processing with rte_sft_drain_mbuf().
> + *
It is possible that some packets will get trapped in the SFT do to this feature.
if it supports ordering. For example the following case:
Packets arrive to the application. After draining the packets the 
Application changed the flow to full offload. This means that
all future packets will not arrive to the application.
But until the flow is offloaded some packets do arrive not in order.
Then the flow is offloaded, this will result in the situation that no more
packets will arrive to the application so some packets will get stack
in the SFT.
I think we must have some force drain or, notify the SFT that no more
packets should arrive to even if the packets are not in order it will release 
them.

Also the same with fragmented does this function allocate new mbufs? are you 
releasing the
old ones?

> + * Flow defined & activated, mbuf not fragmented and 'in order':
> + * - Flow aging related data (see age field in `struct rte_sft_flow_status`)
> + *   will be updated according to *mbuf_in* timestamp.
> + * - Flow connection tracking state (see ct_state field in
> + *   `struct rte_sft_flow_status`)  will be updated according to *mbuf_in* L4
> + *   header contents.
> + * - *mbuf_out* will be set to last processed mbuf.
> + *
> + * @param[in] mbuf_in
> + *   mbuf to process; mbuf pinter considered 'consumed' and should not be
> used
> + *   after successful call to this function.
> + * @param[out] mbuf_out
> + *   last processed not fragmented and in order mbuf.

If the in mbuf is not fragmented and in order, this pointer will point to the 
in one?

> + * @param[out] status
> + *   Structure to dump SFT flow status once updated according to contents of
> + *   *mbuf_in*.

Does the status bits for example fragmented is kept per connection or per flow?
Since it is possible to get fragmented packets from both sides.
The same goes for out of order packets


> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success:
> + *   - *mbuf_out* contains valid mbuf pointer, locked SFT flow recognized by
> + *     status->fid.
> + *   - *mbuf_out* is NULL and status->fragmented bit on in case of
> + *     non last fragment *mbuf_in*.
> + *   - *mbuf_out* is NULL and status->out_of_order bit on in case of out of
> + *     order *mbuf_in*, locked SFT flow recognized by status->fid.
> + *   On failure a negative errno value and rte_errno is set.
> + */
> +int
> +rte_sft_process_mbuf(struct rte_mbuf *mbuf_in,
> +                  struct rte_mbuf **mbuf_out,
> +                  struct rte_sft_flow_status *status,
> +                  struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Process mbuf received on RX queue while zone value provided by caller.
> + *
> + * The behaviour of this function is similar to rte_sft_process_mbuf except
> + * the lookup in SFT procedure. The lookup in SFT always done by the *zone*
> + * arg and 5-tuple 5-tuple, extracted form mbuf outer header contents.
> + *
> + * @see rte_sft_process_mbuf
> + *
> + * @param[in] mbuf_in
> + *   mbuf to process; mbuf pinter considered 'consumed' and should not be
> used
> + *   after successful call to this function.
> + * @param[out] mbuf_out
> + *   last processed not fragmented and in order mbuf.
> + * @param[out] status
> + *   Structure to dump SFT flow status once updated according to contents of
> + *   *mbuf_in*.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success:
> + *   - *mbuf_out* contains valid mbuf pointer.
> + *   - *mbuf_out* is NULL and status->fragmented bit on in case of
> + *     non last fragment *mbuf_in*.
> + *   - *mbuf_out* is NULL and status->out_of_order bit on in case of out of
> + *     order *mbuf_in*.
> + *   On failure a negative errno value and rte_errno is set.
> + */
> +int
> +rte_sft_process_mbuf_with_zone(struct rte_mbuf *mbuf_in,
> +                            uint32_t zone,
> +                            struct rte_mbuf **mbuf_out,
> +                            struct rte_sft_flow_status *status,
> +                            struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Drain next in order mbuf.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * This function behaves similar to rte_sft_process_mbuf() but acts on 
> packets
> + * accumulated in SFT flow due to missing in order packet. Processing done on
> + * single mbuf at a time and `in order`. Other than above the behavior is
> + * same as of rte_sft_process_mbuf for flow defined & activated & mbuf isn't
> + * fragmented & 'in order'. This function should be called when
> + * rte_sft_process_mbuf or rte_sft_process_mbuf_with_zone sets
> + * status->nb_in_order_mbufs output param !=0 and until
> + * status->nb_in_order_mbufs == 0.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param[out] status
> + *   Structure to dump SFT flow status once updated according to contents of
> + *   *mbuf_in*.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   A valid mbuf in case of success, NULL otherwise and rte_errno is set.
> + */
> +struct rte_mbuf *
> +rte_sft_drain_mbuf(uint32_t fid,
> +                struct rte_sft_flow_status *status,
> +                struct rte_sft_error *error);
> +

Fid represent a connection, so which direction do we drain the packets?
since we can have inordered packet in from both directions right?

> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Activate flow in SFT.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * This function performs second phase of flow creation in SFT.
> + * The reasons for 2 phase flow creation procedure:
> + * 1. Missing reverse flow - flow context is shared for both flow directions
> + *    i.e. in order maintain bidirectional flow context in RTE SFT packets
> + *    arriving from both dirrections should be identified as packets of the
> + *    RTE SFT flow. Consequently before creation of the SFT flow caller 
> should
> + *    provide reverse flow direction 7-tuple.
> + * 2. The caller of rte_sft_process_mbuf/rte_sft_process_mbuf_with_zone
> should
> + *   be notified that arrived mbuf is first in flow & decide weather to
> + *   create new flow or it distroy before it was activated with
> + *   rte_sft_flow_destroy.
> + * This function completes creation of the bidirectional SFT flow & creates
> + * entry for 7-tuple on SFT PMD defined by the tuple port for both
> + * initiator/initiate 7-tuples.
> + * Flow aging, connection tracking state & out of order handling will be
> + * initialized according to the content of the *mbuf_in* passes to
> + * rte_sft_process_mbuf/_with_zone during the phase 1 of flow creation.
> + * Once this function returns upcoming calls
> rte_sft_process_mbuf/_with_zone
> + * with 7-tuple or its reverse will return handle to this flow.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param reverse_tuple
> + *   Expected response flow 7-tuple.
> + * @param ctx
> + *   User defined state to set.
> + *   Update of *fid* or *zone* fields in struct rte_flow_item_sft 
> unsupported.
> + * @param ct_enable
> + *   Enables maintenance of status->ct_state connection tracking value for 
> the
> + *   flow; otherwise status->ct_state will be initialized with zeros.
> + * @param evdev_id
> + *   Event dev ID to enqueue end of flow event.
> + * @param evport_id
> + *   Event port ID to enqueue end of flow event.
> + * @param[out] status
> + *   Structure to dump SFT flow status once activated.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_activate(uint32_t fid,
> +                   const struct rte_sft_7tuple *reverse_tuple,
> +                   const struct rte_flow_item_sft *ctx,
> +                   uint8_t ct_enable,
> +                   uint8_t dev_id,
> +                   uint8_t port_id,
> +                   struct rte_sft_flow_status *status,
> +                   struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Artificially create SFT flow.
> + *
> + * Function to create SFT flow before reception of the first flow packet.
> + *
> + * @param tuple
> + *   Expected initiator flow 7-tuple.
> + * @param reverse_tuple
> + *   Expected initiate flow 7-tuple.
> + * @param ctx
> + *   User defined state to set.
> + *   Setting of *fid* or *zone* fields in struct rte_flow_item_sft 
> unsupported.
> + * @param[out] ct_enable
> + *   Enables maintenance of status->ct_state connection tracking value for 
> the
> + *   flow; otherwise status->ct_state will be initialized with zeros.
> + * @param[out] status
> + *   Structure to dump SFT flow status once created.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   - on success: 0, locked SFT flow recognized by status->fid.
> + *   - on error: a negative errno value otherwise and rte_errno is set.
> + */
> +
> +int
> +rte_sft_flow_create(const struct rte_sft_7tuple *tuple,
> +                 const struct rte_sft_7tuple *reverse_tuple,
> +                 const struct rte_flow_item_sft *ctx,
> +                 uint8_t ct_enable,
> +                 struct rte_sft_flow_status *status,
> +                 struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Lock exclusively SFT flow.
> + *
> + * Explicit flow locking; used for handling aged flows.
> + *
> + * @param fid
> + *   SFT flow ID.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_lock(uint32_t fid);
> + 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Release exclusively locked SFT flow.
> + *
> + * When rte_sft_process_mbuf/_with_zone and rte_sft_flow_create
> + * return *status* containing fid with defined bit on the flow considered
> + * exclusively locked and should be unlocked with this function.
> + *
> + * @param fid
> + *   SFT flow ID.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_unlock(uint32_t fid);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Removes flow from SFT.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * - Flow should be locked by caller in order to remove it.
> + * - Flow should have no client objects attached.
> + *
> + * Should be applied on aged flows, when flow aged event received.
> + *
> + * @code{.c}
> + *     while (1) {
> + *         rte_event_dequeue_burst(...);
> + *         FOR_EACH_EV(ev) {
> + *             uint32_t fid = ev.u64;
> + *             rte_sft_flow_lock(fid);
> + *             FOR_EACH_CLIENT(fid, client_id) {
> + *                 rte_sft_flow_reset_client_obj(fid, client_obj);
> + *                 // detached client object handling
> + *             }
> + *             rte_sft_flow_destroy(fid, &error);
> + *         }
> + *     }
> + * @endcode
> + *
> + * @param fid
> + *   SFT flow ID to destroy.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_destroy(uint32_t fid, struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Reset flow age to zero.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * Simulates last flow packet with timestamp set to just now.
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_touch(uint32_t fid, struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Set flow aging to specific value.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param aging
> + *   New flow aging value.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_set_aging(uint32_t fid,
> +                    uint32_t aging,
> +                    struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Set client object for given client ID.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param client_id
> + *   Client ID to set object for.
> + * @param client_obj
> + *   Pointer to opaque client object structure.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +rte_sft_flow_set_client_obj(uint32_t fid,
> +                         uint8_t client_id,
> +                         void *client_obj,
> +                         struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Get client object for given client ID.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param client_id
> + *   Client ID to get object for.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   A valid client object opaque pointer in case of success, NULL otherwise
> + *   and rte_errno is set.
> + */
> +void *
> +rte_sft_flow_get_client_obj(const uint32_t fid,
> +                         uint8_t client_id,
> +                         struct rte_sft_error *error);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Remove client object for given client ID.
> + * Flow should be locked by caller (see rte_sft_flow_lock).
> + *
> + * Detaches client object from SFT flow and returns the ownership for the
> + * client object to the caller by returning client object pointer value.
> + * The pointer returned by this function won't be accessed any more, the
> caller
> + * may release all client obj related resources & the memory allocated for
> + * this client object.
> + *
> + * @param fid
> + *   SFT flow ID.
> + * @param client_id
> + *   Client ID to remove object for.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   A valid client object opaque pointer in case of success, NULL otherwise
> + *   and rte_errno is set.
> + */
> +void *
> +rte_sft_flow_reset_client_obj(uint32_t fid,
> +                           uint8_t client_id,
> +                           struct rte_sft_error *error);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_SFT_H_ */
> diff --git a/lib/librte_sft/rte_sft_driver.h b/lib/librte_sft/rte_sft_driver.h
> new file mode 100644
> index 0000000000..0c9e28fe17
> --- /dev/null
> +++ b/lib/librte_sft/rte_sft_driver.h
> @@ -0,0 +1,195 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#ifndef _RTE_SFT_DRIVER_H_
> +#define _RTE_SFT_DRIVER_H_
> +
> +/**
> + * @file
> + *
> + * RTE SFT Ethernet device PMD API
> + *
> + * APIs that are used by the SFT library to offload SFT operationons
> + * to Ethernet device.
> + */
> +
> +#include "rte_sft.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * Opaque type returned after successfully creating an entry in SFT.
> + *
> + * This handle can be used to manage and query the related entry (e.g. to
> + * destroy it or update age).
> + */
> +struct rte_sft_entry;
> +
> +/**
> + * Create SFT entry in eth_dev SFT.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param tuple
> + *   L3/L4 5-tuple - src/dest IP and port and IP protocol.
> + * @param nat_tuple
> + *   L3/L4 5-tuple to replace in packet original 5-tuple in order to 
> implement
> + *   NAT offloading; if NULL NAT offloading won't be configured for the flow.
> + * @param aging
> + *   Flow aging timeout in seconds.
> + * @param ctx
> + *   Initial values in SFT flow context
> + *   (see RTE flow struct rte_flow_item_sft).
> + *   ctx->zone should be valid.
> + * @param fid
> + *   SFT flow ID for the entry to create on *device*.
> + *   If there is an entry for the *fid* in PMD it will be updated with the
> + *   values of *ctx*.
> + * @param[out] queue_index
> + *   if PMD can figure out the queue where the flow packets will
> + *   arrive in RX data path it will set the value of queue_index; otherwise
> + *   all bits will be turned on.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   A valid handle in case of success, NULL otherwise and rte_errno is set.
> + */
> +typedef struct rte_sft_entry *(*sft_entry_create_t) (struct rte_eth_dev *dev,
> +             const struct rte_sft_5tuple *tuple,
> +             const struct rte_sft_5tuple *nat_tuple,
> +             const uint32_t aging,
> +             const struct rte_flow_item_sft *ctx,
> +             const uint32_t fid,
> +             uint16_t *queue_index,
> +             struct rte_sft_error *error);
> +

I think for easier reading, the API should change to have 6 tuple (5 + zone)
the ctx should be removed and replaced with the state.

Then add new API to modify the ctx
typedef int (*sft_modify_state)(struct rte_eth_dev *dev, uint8 state);
The main issue we my suggestion is that it will force the PMD to store the 
information to recreate
the rule, data that is already  saved by the SFT.

Also I don't see why we need queue index, since the RSS and queue will be 
configured by the RTE flow
in a different group.

> +/**
> + * Destroy SFT entry in eth_dev SFT.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param entry
> + *   Handle to the SFT entry to destroy.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +typedef int (*sft_entry_destroy_t)(struct rte_eth_dev *dev,
> +             struct rte_sft_entry *entry,
> +             struct rte_sft_error *error);
> +
> +/**
> + * Decodes SFT flow context if attached to mbuf by action ``SFT``.
> + * @see RTE flow RTE_FLOW_ACTION_TYPE_SFT.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param mbuf
> + *   mbuf of the packet to decode attached state from.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   A valid SFT flow context in case of success, NULL otherwise and 
> rte_errno
> + *   is set.
> + */
> +typedef struct rte_flow_item_sft *(*sft_entry_mbuf_decode_ctx_t)(
> +             struct rte_eth_dev *dev,
> +             const struct rte_mbuf *mbuf,
> +             struct rte_sft_error *error);
> +

What about returning int as error code, and return the rte_flow_item_sft
as out parameter?
This will remove the allocation and free.

> +/**
> + * Get aged-out SFT entries.
> + *
> + * Report entry as aged-out if timeout passed without any matching
> + * on the SFT entry.
> + *
> + * @param[in] dev
> + *   Pointer to Ethernet device structure.
> + * @param[in, out] fid_aged
> + *   The address of an array of aged-out SFT flow IDs.
> + * @param[in] nb_aged
> + *   The length of *fid_aged* array pointers.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. Initialized in case of
> + *   error only.
> + *
> + * @return
> + *   if nb_aged is 0, return the amount of all aged flows.
> + *   if nb_aged is not 0 , return the amount of aged flows reported
> + *   in the *fid_aged* array, otherwise negative errno value.
> + */
> +typedef int (*sft_entry_get_aged_entries_t)(struct rte_eth_dev *dev,
> +             uint32_t *fid_aged,
> +             int nb_aged,
> +             struct rte_sft_error *error);
> +
> +/**
> + * Simulate SFT entry match in terms of entry aging.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param fid
> + *   SFT flow ID paired with dev to retrieve related SFT entry.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +typedef int (*sft_entry_touch_t)(struct rte_eth_dev *dev,
> +             uint32_t fid,
> +             struct rte_sft_error *error);
> +
> +/**
> + * Set SFT entry aging to specific value.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param fid
> + *   SFT flow ID paired with dev to retrieve related SFT entry.
> + * @param aging
> + *   New entry aging value.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +typedef int (*sft_entry_set_aging_t)(struct rte_eth_dev *dev,
> +             uint32_t fid,
> +             uint32_t aging,
> +             struct rte_sft_error *error);
> +
> +/** SFT operations function pointer table */
> +struct rte_sft_ops {
> +     sft_entry_create_t entry_create;
> +     /**< Create SFT entry in eth_dev SFT. */
> +     sft_entry_destroy_t entry_destroy;
> +     /**< Destroy SFT entry in eth_dev SFT. */
> +     sft_entry_mbuf_decode_ctx_t mbuf_decode_ctx;
> +     /**< Decodes SFT flow context if attached to mbuf by action ``SFT``. */
> +     sft_entry_get_aged_entries_t get_aged_entries;
> +     /**< Get aged-out SFT entries. */
> +     sft_entry_touch_t entry_touch;
> +     /**< Simulate SFT entry match in terms of entry aging. */
> +     sft_entry_set_aging_t set_aging;
> +     /**< Set SFT entry aging to specific value. */
> +};
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_SFT_DRIVER_H_ */
> diff --git a/lib/librte_sft/rte_sft_version.map
> b/lib/librte_sft/rte_sft_version.map
> new file mode 100644
> index 0000000000..747e100ac5
> --- /dev/null
> +++ b/lib/librte_sft/rte_sft_version.map
> @@ -0,0 +1,21 @@
> +EXPERIMENTAL {
> +     global:
> +
> +     rte_sft_flow_get_status;
> +     rte_sft_flow_set_ctx;
> +     rte_sft_init;
> +     rte_sft_fini;
> +     rte_sft_process_mbuf;
> +     rte_sft_process_mbuf_with_zone;
> +     rte_sft_drain_mbuf;
> +     rte_sft_flow_activate;
> +     rte_sft_flow_create;
> +     rte_sft_flow_lock;
> +     rte_sft_flow_unlock;
> +     rte_sft_flow_destroy;
> +     rte_sft_flow_touch;
> +     rte_sft_flow_set_aging;
> +     rte_sft_flow_set_client_obj;
> +     rte_sft_flow_get_client_obj;
> +     rte_sft_flow_reset_client_obj;
> +};
> --
> 2.26.2

Best,
Ori

Reply via email to