This will break the ABI. Please check and fix.
> rte_node_process_t process; /**< Node process function. */
> rte_node_init_t init; /**< Node init function. */
> rte_node_fini_t fini; /**< Node fini function. */
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h
> b/lib/graph/rte_graph_model_mcore_dispatch.h
> index f9ff3daa88ec..50a473564b56 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.h
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.h
> @@ -77,9 +77,13 @@ int
> rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
> unsigned int
> lcore_id);
>
> /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
> * of the nodes and collect the stats.
> *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + * Nodes with different lcore affinity are dispatched to their target lcore.
> + *
> * @param graph
> * Graph pointer returned from rte_graph_lookup function.
> *
> @@ -88,20 +92,28 @@ int
> rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
> static inline void
> rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
> {
> - const rte_graph_off_t *cir_start = graph->cir_start;
> - const rte_node_t mask = graph->cir_mask;
> - uint32_t head = graph->head;
> + const uint16_t nwords = graph->nb_sched_words;
> struct rte_node *node;
> + uint16_t word, bit;
>
> if (graph->dispatch.wq != NULL)
> __rte_graph_mcore_dispatch_sched_wq_process(graph);
>
> - while (likely(head != graph->tail)) {
> - node = (struct rte_node *)RTE_PTR_ADD(graph,
> cir_start[(int32_t)head++]);
> + /* Seed pending bitmap with source nodes bound to this lcore */
> + for (word = 0; word < nwords; word++)
> + graph->pending[word] |= graph->src_pending[word];
>
> - /* skip the src nodes which not bind with current worker */
> - if ((int32_t)head < 1 && node->dispatch.lcore_id !=
> graph->dispatch.lcore_id)
> - continue;
> + for (;;) {
> + /* find first word with any pending bit */
> + for (word = 0; word < nwords; word++)
> + if (graph->pending[word])
> + break;
> + if (word == nwords)
> + break; /* no more pending nodes */
> +
> + bit = rte_ctz64(graph->pending[word]);
> + graph->pending[word] &= ~(1ULL << bit);
> + node = __rte_graph_pending_node(graph, word, bit);
>
> /* Schedule the node until all task/objs are done */
> if (node->dispatch.lcore_id != RTE_MAX_LCORE &&
> @@ -111,11 +123,7 @@ rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
> continue;
>
> __rte_node_process(graph, node);
> -
> - head = likely((int32_t)head > 0) ? head & mask : head;
> }
> -
> - graph->tail = 0;
> }
>
> #ifdef __cplusplus
> diff --git a/lib/graph/rte_graph_model_rtc.h b/lib/graph/rte_graph_model_rtc.h
> index 4b6236e301e3..38feb3e1ca88 100644
> --- a/lib/graph/rte_graph_model_rtc.h
> +++ b/lib/graph/rte_graph_model_rtc.h
> @@ -6,9 +6,12 @@
> #include "rte_graph_worker_common.h"
>
> /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
> * of the nodes and collect the stats.
> *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + *
> * @param graph
> * Graph pointer returned from rte_graph_lookup function.
> *
> @@ -17,30 +20,52 @@
> static inline void
> rte_graph_walk_rtc(struct rte_graph *graph)
> {
> - const rte_graph_off_t *cir_start = graph->cir_start;
> - const rte_node_t mask = graph->cir_mask;
> - uint32_t head = graph->head;
> + const uint16_t nwords = graph->nb_sched_words;
> struct rte_node *node;
> + uint16_t word, bit;
>
> /*
> - * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
> - * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
> - * in a circular buffer fashion.
> + * Nodes are assigned a bit position (sched_idx) sorted by (priority,
> + * node_id) at graph creation time. Source nodes are forced to INT16_MIN
> + * priority so they always come first.
> *
> - * +-----+ <= cir_start - head [number of source nodes]
> - * | |
> - * | ... | <= source nodes
> - * | |
> - * +-----+ <= cir_start [head = 0] [tail = 0]
> - * | |
> - * | ... | <= pending streams
> - * | |
> - * +-----+ <= cir_start + mask
> + * sched_table[] maps bit positions to node offsets:
> + *
> + * pending[] sched_table[]
> + * +----------+ +------------------+
> + * | word 0 | ---> | src_node_0 | bit 0 (prio=INT16_MIN)
> + * | 1100...1 | | src_node_1 | bit 1 (prio=INT16_MIN)
> + * | | | mpls_input | bit 2 (prio=-10)
> + * | | | ipv4_input | bit 3 (prio=0)
> + * | | | ... |
> + * +----------+ +------------------+
> + * | word 1 | ---> | ip4_rewrite | bit 64 (prio=10)
> + * | ... | | ... |
> + * +----------+ +------------------+
> + *
> + * Walk: for each word, find lowest set bit (rte_ctz64), process that
> + * node, clear the bit, re-read the word (processing may have set new
> + * bits), repeat.
> + *
> + * After each node is processed, restart scanning from word 0 since
> + * processing may set bits in any word, including earlier ones.
> */
> - while (likely(head != graph->tail)) {
> - node = (struct rte_node *)RTE_PTR_ADD(graph,
> cir_start[(int32_t)head++]);
> +
> + /* Seed pending bitmap with source nodes */
> + for (word = 0; word < nwords; word++)
> + graph->pending[word] |= graph->src_pending[word];
> +
> + for (;;) {
> + /* find first word with any pending bit */
> + for (word = 0; word < nwords; word++)
> + if (graph->pending[word])
> + break;
> + if (word == nwords)
> + break; /* no more pending nodes */
> +
> + bit = rte_ctz64(graph->pending[word]);
> + graph->pending[word] &= ~(1ULL << bit);
> + node = __rte_graph_pending_node(graph, word, bit);
> __rte_node_process(graph, node);
> - head = likely((int32_t)head > 0) ? head & mask : head;
> }
> - graph->tail = 0;
> }
> diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
> index b0f952a82cc9..e513d7a655d9 100644
> --- a/lib/graph/rte_graph_worker.h
> +++ b/lib/graph/rte_graph_worker.h
> @@ -14,7 +14,7 @@ extern "C" {
> #endif
>
> /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
> * of the nodes and collect the stats.
> *
> * @param graph
> diff --git a/lib/graph/rte_graph_worker_common.h
> b/lib/graph/rte_graph_worker_common.h
> index 4ab53a533e4c..0e60486043d8 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -49,15 +49,14 @@ SLIST_HEAD(rte_graph_rq_head, rte_graph);
> */
> struct __rte_cache_aligned rte_graph {
> /* Fast path area. */
> - uint32_t tail; /**< Tail of circular buffer. */
> - uint32_t head; /**< Head of circular buffer. */
> - uint32_t cir_mask; /**< Circular buffer wrap around mask. */
> rte_node_t nb_nodes; /**< Number of nodes in the graph. */
> - rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */
> rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
> + rte_graph_off_t *sched_table; /**< Node offset indexed by sched_idx. */
> + uint64_t *pending; /**< Bitmap of pending nodes. */
> + uint64_t *src_pending; /**< Bitmap of source nodes (constant). */
> + uint16_t nb_sched_words; /**< Number of uint64_t words in pending
> bitmaps. */
> uint8_t model; /**< graph model */
> - uint8_t reserved1; /**< Reserved for future use. */
> - uint16_t reserved2; /**< Reserved for future use. */
> + /* 26 bytes padding */
> union {
> /* Fast schedule area for mcore dispatch model */
> struct {
> @@ -98,6 +97,7 @@ struct __rte_cache_aligned rte_node {
> rte_node_t id; /**< Node identifier. */
> rte_node_t parent_id; /**< Parent Node identifier. */
> rte_edge_t nb_edges; /**< Number of edges from this node. */
> + uint16_t sched_idx; /**< Bit position in pending bitmap. */
> uint32_t realloc_count; /**< Number of times realloced. */
>
> char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */
> @@ -132,7 +132,7 @@ struct __rte_cache_aligned rte_node {
> }; /**< Node Context. */
> uint16_t size; /**< Total number of objects available.
> */
> uint16_t idx; /**< Number of objects used. */
> - rte_graph_off_t off; /**< Offset of node in the graph reel.
> */
> + rte_graph_off_t off; /**< Offset of node in the graph
> memory. */
> uint64_t total_cycles; /**< Cycles spent in this node. */
> uint64_t total_calls; /**< Calls done to this node. */
> uint64_t total_objs; /**< Objects processed by this node. */
> @@ -187,12 +187,12 @@ void __rte_node_stream_alloc_size(struct rte_graph
> *graph,
> /**
> * @internal
> *
> - * Enqueue a given node to the tail of the graph reel.
> + * Process a node's pending objects and collect stats.
> *
> * @param graph
> * Pointer Graph object.
> * @param node
> - * Pointer to node object to be enqueued.
> + * Pointer to node object to be processed.
> */
> static __rte_always_inline void
> __rte_node_process(struct rte_graph *graph, struct rte_node *node)
> @@ -220,21 +220,42 @@ __rte_node_process(struct rte_graph *graph, struct
> rte_node *node)
> /**
> * @internal
> *
> - * Enqueue a given node to the tail of the graph reel.
> + * Get a pointer to a node from the scheduling table.
> *
> * @param graph
> * Pointer Graph object.
> + * @param word
> + * Offset in the pending bitmap.
> + * @param bit
> + * Bit number.
> + *
> + * @return
> + * Pointer to the node.
> + */
> +static __rte_always_inline struct rte_node *
> +__rte_graph_pending_node(struct rte_graph *graph, uint16_t word, uint16_t
> bit)
> +{
> + const uint16_t index = (word * sizeof(*graph->pending) * CHAR_BIT) +
> bit;
> + const rte_graph_off_t node_offset = graph->sched_table[index];
> + return RTE_PTR_ADD(graph, node_offset);
> +}
> +
> +/**
> + * @internal
> + *
> + * Mark a node as pending in the graph scheduling bitmap.
> + *
> + * @param bitmap
> + * Either graph->pending or graph->src_pending.
> * @param node
> - * Pointer to node object to be enqueued.
> + * Pointer to node object to be marked pending.
> */
> static __rte_always_inline void
> -__rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node
> *node)
> +__rte_node_pending_set(uint64_t *bitmap, struct rte_node *node)
> {
> - uint32_t tail;
> -
> - tail = graph->tail;
> - graph->cir_start[tail++] = node->off;
> - graph->tail = tail & graph->cir_mask;
> + const uint16_t word = node->sched_idx / (sizeof(*bitmap) * CHAR_BIT);
> + const uint16_t bit = node->sched_idx % (sizeof(*bitmap) * CHAR_BIT);
> + bitmap[word] |= 1ULL << bit;
> }
>
> /**
> @@ -242,8 +263,8 @@ __rte_node_enqueue_tail_update(struct rte_graph *graph,
> struct rte_node *node)
> *
> * Enqueue sequence prologue function.
> *
> - * Updates the node to tail of graph reel and resizes the number of objects
> - * available in the stream as needed.
> + * Marks the node as pending in the scheduling bitmap and resizes the number
> + * of objects available in the stream as needed.
> *
> * @param graph
> * Pointer to the graph object.
> @@ -259,9 +280,8 @@ __rte_node_enqueue_prologue(struct rte_graph *graph,
> struct rte_node *node,
> const uint16_t idx, const uint16_t space)
> {
>
> - /* Add to the pending stream list if the node is new */
> if (idx == 0)
> - __rte_node_enqueue_tail_update(graph, node);
> + __rte_node_pending_set(graph->pending, node);
>
> if (unlikely(node->size < (idx + space)))
> __rte_node_stream_alloc_size(graph, node, node->size + space);
> @@ -293,7 +313,7 @@ __rte_node_next_node_get(struct rte_node *node,
> rte_edge_t next)
>
> /**
> * Enqueue the objs to next node for further processing and set
> - * the next node to pending state in the circular buffer.
> + * the next node to pending state in the scheduling bitmap.
> *
> * @param graph
> * Graph pointer returned from rte_graph_lookup().
> @@ -321,7 +341,7 @@ rte_node_enqueue(struct rte_graph *graph, struct rte_node
> *node,
>
> /**
> * Enqueue only one obj to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
> *
> * @param graph
> * Graph pointer returned from rte_graph_lookup().
> @@ -347,7 +367,7 @@ rte_node_enqueue_x1(struct rte_graph *graph, struct
> rte_node *node,
>
> /**
> * Enqueue only two objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
> * Same as rte_node_enqueue_x1 but enqueue two objs.
> *
> * @param graph
> @@ -377,7 +397,7 @@ rte_node_enqueue_x2(struct rte_graph *graph, struct
> rte_node *node,
>
> /**
> * Enqueue only four objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
> * Same as rte_node_enqueue_x1 but enqueue four objs.
> *
> * @param graph
> @@ -414,7 +434,7 @@ rte_node_enqueue_x4(struct rte_graph *graph, struct
> rte_node *node,
>
> /**
> * Enqueue objs to multiple next nodes for further processing and
> - * set the next nodes to pending state in the circular buffer.
> + * set the next nodes to pending state in the scheduling bitmap.
> * objs[i] will be enqueued to nexts[i].
> *
> * @param graph
> @@ -472,7 +492,7 @@ rte_node_next_stream_get(struct rte_graph *graph, struct
> rte_node *node,
> }
>
> /**
> - * Put the next stream to pending state in the circular buffer
> + * Put the next stream to pending state in the scheduling bitmap
> * for further processing. Should be invoked after
> rte_node_next_stream_get().
> *
> * @param graph
> @@ -496,8 +516,7 @@ rte_node_next_stream_put(struct rte_graph *graph, struct
> rte_node *node,
>
> node = __rte_node_next_node_get(node, next);
> if (node->idx == 0)
> - __rte_node_enqueue_tail_update(graph, node);
> -
> + __rte_node_pending_set(graph->pending, node);
> node->idx += idx;
> }
>
> @@ -530,7 +549,7 @@ rte_node_next_stream_move(struct rte_graph *graph, struct
> rte_node *src,
> src->objs = dobjs;
> src->size = dsz;
> dst->idx = src->idx;
> - __rte_node_enqueue_tail_update(graph, dst);
> + __rte_node_pending_set(graph->pending, dst);
> } else { /* Move the objects from src node to dst node */
> rte_node_enqueue(graph, src, next, src->objs, src->idx);
> }
> --
> 2.54.0
>