This will break the ABI. Please check and fix.

>       rte_node_process_t process; /**< Node process function. */
>       rte_node_init_t init;       /**< Node init function. */
>       rte_node_fini_t fini;       /**< Node fini function. */
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h 
> b/lib/graph/rte_graph_model_mcore_dispatch.h
> index f9ff3daa88ec..50a473564b56 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.h
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.h
> @@ -77,9 +77,13 @@ int 
> rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
>                                                          unsigned int 
> lcore_id);
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + * Nodes with different lcore affinity are dispatched to their target lcore.
> + *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup function.
>   *
> @@ -88,20 +92,28 @@ int 
> rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
>  static inline void
>  rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
>  {
> -     const rte_graph_off_t *cir_start = graph->cir_start;
> -     const rte_node_t mask = graph->cir_mask;
> -     uint32_t head = graph->head;
> +     const uint16_t nwords = graph->nb_sched_words;
>       struct rte_node *node;
> +     uint16_t word, bit;
>
>       if (graph->dispatch.wq != NULL)
>               __rte_graph_mcore_dispatch_sched_wq_process(graph);
>
> -     while (likely(head != graph->tail)) {
> -             node = (struct rte_node *)RTE_PTR_ADD(graph, 
> cir_start[(int32_t)head++]);
> +     /* Seed pending bitmap with source nodes bound to this lcore */
> +     for (word = 0; word < nwords; word++)
> +             graph->pending[word] |= graph->src_pending[word];
>
> -             /* skip the src nodes which not bind with current worker */
> -             if ((int32_t)head < 1 && node->dispatch.lcore_id != 
> graph->dispatch.lcore_id)
> -                     continue;
> +     for (;;) {
> +             /* find first word with any pending bit */
> +             for (word = 0; word < nwords; word++)
> +                     if (graph->pending[word])
> +                             break;
> +             if (word == nwords)
> +                     break; /* no more pending nodes */
> +
> +             bit = rte_ctz64(graph->pending[word]);
> +             graph->pending[word] &= ~(1ULL << bit);
> +             node = __rte_graph_pending_node(graph, word, bit);
>
>               /* Schedule the node until all task/objs are done */
>               if (node->dispatch.lcore_id != RTE_MAX_LCORE &&
> @@ -111,11 +123,7 @@ rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
>                       continue;
>
>               __rte_node_process(graph, node);
> -
> -             head = likely((int32_t)head > 0) ? head & mask : head;
>       }
> -
> -     graph->tail = 0;
>  }
>
>  #ifdef __cplusplus
> diff --git a/lib/graph/rte_graph_model_rtc.h b/lib/graph/rte_graph_model_rtc.h
> index 4b6236e301e3..38feb3e1ca88 100644
> --- a/lib/graph/rte_graph_model_rtc.h
> +++ b/lib/graph/rte_graph_model_rtc.h
> @@ -6,9 +6,12 @@
>  #include "rte_graph_worker_common.h"
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup function.
>   *
> @@ -17,30 +20,52 @@
>  static inline void
>  rte_graph_walk_rtc(struct rte_graph *graph)
>  {
> -     const rte_graph_off_t *cir_start = graph->cir_start;
> -     const rte_node_t mask = graph->cir_mask;
> -     uint32_t head = graph->head;
> +     const uint16_t nwords = graph->nb_sched_words;
>       struct rte_node *node;
> +     uint16_t word, bit;
>
>       /*
> -      * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
> -      * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
> -      * in a circular buffer fashion.
> +      * Nodes are assigned a bit position (sched_idx) sorted by (priority,
> +      * node_id) at graph creation time. Source nodes are forced to INT16_MIN
> +      * priority so they always come first.
>        *
> -      *      +-----+ <= cir_start - head [number of source nodes]
> -      *      |     |
> -      *      | ... | <= source nodes
> -      *      |     |
> -      *      +-----+ <= cir_start [head = 0] [tail = 0]
> -      *      |     |
> -      *      | ... | <= pending streams
> -      *      |     |
> -      *      +-----+ <= cir_start + mask
> +      * sched_table[] maps bit positions to node offsets:
> +      *
> +      *   pending[]         sched_table[]
> +      *   +----------+      +------------------+
> +      *   | word 0   | ---> | src_node_0       | bit 0 (prio=INT16_MIN)
> +      *   | 1100...1 |      | src_node_1       | bit 1 (prio=INT16_MIN)
> +      *   |          |      | mpls_input       | bit 2 (prio=-10)
> +      *   |          |      | ipv4_input       | bit 3 (prio=0)
> +      *   |          |      | ...              |
> +      *   +----------+      +------------------+
> +      *   | word 1   | ---> | ip4_rewrite      | bit 64 (prio=10)
> +      *   | ...      |      | ...              |
> +      *   +----------+      +------------------+
> +      *
> +      * Walk: for each word, find lowest set bit (rte_ctz64), process that
> +      * node, clear the bit, re-read the word (processing may have set new
> +      * bits), repeat.
> +      *
> +      * After each node is processed, restart scanning from word 0 since
> +      * processing may set bits in any word, including earlier ones.
>        */
> -     while (likely(head != graph->tail)) {
> -             node = (struct rte_node *)RTE_PTR_ADD(graph, 
> cir_start[(int32_t)head++]);
> +
> +     /* Seed pending bitmap with source nodes */
> +     for (word = 0; word < nwords; word++)
> +             graph->pending[word] |= graph->src_pending[word];
> +
> +     for (;;) {
> +             /* find first word with any pending bit */
> +             for (word = 0; word < nwords; word++)
> +                     if (graph->pending[word])
> +                             break;
> +             if (word == nwords)
> +                     break; /* no more pending nodes */
> +
> +             bit = rte_ctz64(graph->pending[word]);
> +             graph->pending[word] &= ~(1ULL << bit);
> +             node = __rte_graph_pending_node(graph, word, bit);
>               __rte_node_process(graph, node);
> -             head = likely((int32_t)head > 0) ? head & mask : head;
>       }
> -     graph->tail = 0;
>  }
> diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
> index b0f952a82cc9..e513d7a655d9 100644
> --- a/lib/graph/rte_graph_worker.h
> +++ b/lib/graph/rte_graph_worker.h
> @@ -14,7 +14,7 @@ extern "C" {
>  #endif
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
>   * @param graph
> diff --git a/lib/graph/rte_graph_worker_common.h 
> b/lib/graph/rte_graph_worker_common.h
> index 4ab53a533e4c..0e60486043d8 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -49,15 +49,14 @@ SLIST_HEAD(rte_graph_rq_head, rte_graph);
>   */
>  struct __rte_cache_aligned rte_graph {
>       /* Fast path area. */
> -     uint32_t tail;               /**< Tail of circular buffer. */
> -     uint32_t head;               /**< Head of circular buffer. */
> -     uint32_t cir_mask;           /**< Circular buffer wrap around mask. */
>       rte_node_t nb_nodes;         /**< Number of nodes in the graph. */
> -     rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
>       rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
> +     rte_graph_off_t *sched_table; /**< Node offset indexed by sched_idx. */
> +     uint64_t *pending;           /**< Bitmap of pending nodes. */
> +     uint64_t *src_pending;       /**< Bitmap of source nodes (constant). */
> +     uint16_t nb_sched_words;     /**< Number of uint64_t words in pending 
> bitmaps. */
>       uint8_t model;               /**< graph model */
> -     uint8_t reserved1;           /**< Reserved for future use. */
> -     uint16_t reserved2;          /**< Reserved for future use. */
> +     /* 26 bytes padding */
>       union {
>               /* Fast schedule area for mcore dispatch model */
>               struct {
> @@ -98,6 +97,7 @@ struct __rte_cache_aligned rte_node {
>       rte_node_t id;          /**< Node identifier. */
>       rte_node_t parent_id;   /**< Parent Node identifier. */
>       rte_edge_t nb_edges;    /**< Number of edges from this node. */
> +     uint16_t sched_idx;     /**< Bit position in pending bitmap. */
>       uint32_t realloc_count; /**< Number of times realloced. */
>
>       char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */
> @@ -132,7 +132,7 @@ struct __rte_cache_aligned rte_node {
>               }; /**< Node Context. */
>               uint16_t size;          /**< Total number of objects available. 
> */
>               uint16_t idx;           /**< Number of objects used. */
> -             rte_graph_off_t off;    /**< Offset of node in the graph reel. 
> */
> +             rte_graph_off_t off;    /**< Offset of node in the graph 
> memory. */
>               uint64_t total_cycles;  /**< Cycles spent in this node. */
>               uint64_t total_calls;   /**< Calls done to this node. */
>               uint64_t total_objs;    /**< Objects processed by this node. */
> @@ -187,12 +187,12 @@ void __rte_node_stream_alloc_size(struct rte_graph 
> *graph,
>  /**
>   * @internal
>   *
> - * Enqueue a given node to the tail of the graph reel.
> + * Process a node's pending objects and collect stats.
>   *
>   * @param graph
>   *   Pointer Graph object.
>   * @param node
> - *   Pointer to node object to be enqueued.
> + *   Pointer to node object to be processed.
>   */
>  static __rte_always_inline void
>  __rte_node_process(struct rte_graph *graph, struct rte_node *node)
> @@ -220,21 +220,42 @@ __rte_node_process(struct rte_graph *graph, struct 
> rte_node *node)
>  /**
>   * @internal
>   *
> - * Enqueue a given node to the tail of the graph reel.
> + * Get a pointer to a node from the scheduling table.
>   *
>   * @param graph
>   *   Pointer Graph object.
> + * @param word
> + *   Offset in the pending bitmap.
> + * @param bit
> + *   Bit number.
> + *
> + * @return
> + *   Pointer to the node.
> + */
> +static __rte_always_inline struct rte_node *
> +__rte_graph_pending_node(struct rte_graph *graph, uint16_t word, uint16_t 
> bit)
> +{
> +     const uint16_t index = (word * sizeof(*graph->pending) * CHAR_BIT) + 
> bit;
> +     const rte_graph_off_t node_offset = graph->sched_table[index];
> +     return RTE_PTR_ADD(graph, node_offset);
> +}
> +
> +/**
> + * @internal
> + *
> + * Mark a node as pending in the graph scheduling bitmap.
> + *
> + * @param bitmap
> + *   Either graph->pending or graph->src_pending.
>   * @param node
> - *   Pointer to node object to be enqueued.
> + *   Pointer to node object to be marked pending.
>   */
>  static __rte_always_inline void
> -__rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node 
> *node)
> +__rte_node_pending_set(uint64_t *bitmap, struct rte_node *node)
>  {
> -     uint32_t tail;
> -
> -     tail = graph->tail;
> -     graph->cir_start[tail++] = node->off;
> -     graph->tail = tail & graph->cir_mask;
> +     const uint16_t word = node->sched_idx / (sizeof(*bitmap) * CHAR_BIT);
> +     const uint16_t bit = node->sched_idx % (sizeof(*bitmap) * CHAR_BIT);
> +     bitmap[word] |= 1ULL << bit;
>  }
>
>  /**
> @@ -242,8 +263,8 @@ __rte_node_enqueue_tail_update(struct rte_graph *graph, 
> struct rte_node *node)
>   *
>   * Enqueue sequence prologue function.
>   *
> - * Updates the node to tail of graph reel and resizes the number of objects
> - * available in the stream as needed.
> + * Marks the node as pending in the scheduling bitmap and resizes the number
> + * of objects available in the stream as needed.
>   *
>   * @param graph
>   *   Pointer to the graph object.
> @@ -259,9 +280,8 @@ __rte_node_enqueue_prologue(struct rte_graph *graph, 
> struct rte_node *node,
>                           const uint16_t idx, const uint16_t space)
>  {
>
> -     /* Add to the pending stream list if the node is new */
>       if (idx == 0)
> -             __rte_node_enqueue_tail_update(graph, node);
> +             __rte_node_pending_set(graph->pending, node);
>
>       if (unlikely(node->size < (idx + space)))
>               __rte_node_stream_alloc_size(graph, node, node->size + space);
> @@ -293,7 +313,7 @@ __rte_node_next_node_get(struct rte_node *node, 
> rte_edge_t next)
>
>  /**
>   * Enqueue the objs to next node for further processing and set
> - * the next node to pending state in the circular buffer.
> + * the next node to pending state in the scheduling bitmap.
>   *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup().
> @@ -321,7 +341,7 @@ rte_node_enqueue(struct rte_graph *graph, struct rte_node 
> *node,
>
>  /**
>   * Enqueue only one obj to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup().
> @@ -347,7 +367,7 @@ rte_node_enqueue_x1(struct rte_graph *graph, struct 
> rte_node *node,
>
>  /**
>   * Enqueue only two objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   * Same as rte_node_enqueue_x1 but enqueue two objs.
>   *
>   * @param graph
> @@ -377,7 +397,7 @@ rte_node_enqueue_x2(struct rte_graph *graph, struct 
> rte_node *node,
>
>  /**
>   * Enqueue only four objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   * Same as rte_node_enqueue_x1 but enqueue four objs.
>   *
>   * @param graph
> @@ -414,7 +434,7 @@ rte_node_enqueue_x4(struct rte_graph *graph, struct 
> rte_node *node,
>
>  /**
>   * Enqueue objs to multiple next nodes for further processing and
> - * set the next nodes to pending state in the circular buffer.
> + * set the next nodes to pending state in the scheduling bitmap.
>   * objs[i] will be enqueued to nexts[i].
>   *
>   * @param graph
> @@ -472,7 +492,7 @@ rte_node_next_stream_get(struct rte_graph *graph, struct 
> rte_node *node,
>  }
>
>  /**
> - * Put the next stream to pending state in the circular buffer
> + * Put the next stream to pending state in the scheduling bitmap
>   * for further processing. Should be invoked after 
> rte_node_next_stream_get().
>   *
>   * @param graph
> @@ -496,8 +516,7 @@ rte_node_next_stream_put(struct rte_graph *graph, struct 
> rte_node *node,
>
>       node = __rte_node_next_node_get(node, next);
>       if (node->idx == 0)
> -             __rte_node_enqueue_tail_update(graph, node);
> -
> +             __rte_node_pending_set(graph->pending, node);
>       node->idx += idx;
>  }
>
> @@ -530,7 +549,7 @@ rte_node_next_stream_move(struct rte_graph *graph, struct 
> rte_node *src,
>               src->objs = dobjs;
>               src->size = dsz;
>               dst->idx = src->idx;
> -             __rte_node_enqueue_tail_update(graph, dst);
> +             __rte_node_pending_set(graph->pending, dst);
>       } else { /* Move the objects from src node to dst node */
>               rte_node_enqueue(graph, src, next, src->objs, src->idx);
>       }
> --
> 2.54.0
>

Reply via email to