On Mon, Jun 22, 2026 at 12:11 AM Morten Brørup <[email protected]> 
wrote:
>
> Added graph node profiling stats, build time configurable by enabling
> RTE_GRAPH_PROFILE in rte_config.h.
>
> Signed-off-by: Morten Brørup <[email protected]>
> ---
> v5:
> * Added stats for a half burst and a full burst.
> v4:
> * Added documentation. (AI)
> * Added more comments. (AI)
> * Improved dump. (AI)
> * Debug shows both cycles/call and cycles/obj.
> v3:
> * Debug shows cycles/obj instead of cycles/call.
> * Fixed missing --in-reply-to.
> v2:
> * Fixed indentation.
> ---
>  config/rte_config.h                 |  1 +
>  doc/guides/prog_guide/graph_lib.rst |  1 +
>  lib/graph/graph_debug.c             | 57 ++++++++++++++++++++++++++++-
>  lib/graph/node.c                    |  2 +
>  lib/graph/rte_graph_worker_common.h | 33 +++++++++++++++--

Please update app/test/test_graph.c to validate this featue.

>  5 files changed, 90 insertions(+), 4 deletions(-)
>
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 0447cdf2ad..1942c1b1ec 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -106,6 +106,7 @@
>  /* rte_graph defines */
>  #define RTE_GRAPH_BURST_SIZE 256
>  #define RTE_LIBRTE_GRAPH_STATS 1
> +/* RTE_GRAPH_PROFILE is not set */
>
>  /****** driver defines ********/
>
> diff --git a/doc/guides/prog_guide/graph_lib.rst 
> b/doc/guides/prog_guide/graph_lib.rst
> index 8dd49c19d2..bc36042296 100644
> --- a/doc/guides/prog_guide/graph_lib.rst
> +++ b/doc/guides/prog_guide/graph_lib.rst
> @@ -49,6 +49,7 @@ Performance tuning parameters
>    RTE_GRAPH_BURST_SIZE config option.
>    The testing shows, on x86 and arm64 servers, The sweet spot is 256 burst
>    size. While on arm64 embedded SoCs, it is either 64 or 128.
> +- Enable the ``RTE_GRAPH_PROFILE`` config option for more profiling details.
>  - Disable node statistics (using ``RTE_LIBRTE_GRAPH_STATS`` config option)
>    if not needed.
>
> diff --git a/lib/graph/graph_debug.c b/lib/graph/graph_debug.c
> index e3b8cccdc1..7c97e23748 100644
> --- a/lib/graph/graph_debug.c
> +++ b/lib/graph/graph_debug.c
> @@ -92,7 +92,62 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g, bool all)
>                         fprintf(f, "       total_sched_fail=%" PRId64 "\n",
>                                 n->dispatch.total_sched_fail);
>                 }
> -               fprintf(f, "       total_calls=%" PRId64 "\n", 
> n->total_calls);
> +               fprintf(f, "       total_calls=%" PRIu64 "\n", 
> n->total_calls);
> +               if (rte_graph_has_stats_feature()) {
> +                       fprintf(f, "       total_cycles=%" PRIu64 ", avg 
> cycles/call=%.1f\n",
> +                                       n->total_cycles,
> +                                       n->total_calls == 0 ? (double)0 :
> +                                       (double)n->total_cycles / 
> (double)n->total_calls);
> +               }
> +#ifdef RTE_GRAPH_PROFILE


Please introduce rte_graph_has_profile_featue() similar to
rte_graph_has_stats_feature() to reduce if def clutter as possible.

> +               uint64_t calls = n->usage_stats[0].calls;
> +               fprintf(f, "       objs[0]\n");
> +               fprintf(f, "         calls=%" PRIu64 ", cycles=%" PRIu64 ", 
> avg cycles/call=%.1f\n",
> +                               calls,

>
> diff --git a/lib/graph/rte_graph_worker_common.h 
> b/lib/graph/rte_graph_worker_common.h
> index 4ab53a533e..0d8039575d 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -144,12 +144,26 @@ struct __rte_cache_aligned rte_node {
>                         rte_node_process_t process; /**< Process function. */
>                         uint64_t process_u64;
>                 };
> +               /** Fast path area cache line 3. */
> +#ifdef RTE_GRAPH_PROFILE
> +               struct {
> +                       uint64_t calls;     /**< Calls processing resp. 0 or 
> 1 objects. */
> +                       uint64_t cycles;    /**< Cycles spent processing 
> resp. 0 or 1 objects. */
> +               } usage_stats[2];       /**< Usage when this node processed 0 
> or 1 objects. */
> +               uint64_t full_burst_calls;  /**< Calls processing a full 
> burst of objects. */
> +               uint64_t full_burst_cycles; /**< Cycles spent processing a 
> full burst of objects. */
> +               uint64_t half_burst_calls;  /**< Calls processing a half 
> burst of objects. */
> +               uint64_t half_burst_cycles; /**< Cycles spent processing a 
> half burst of objects. */
> +               /** Fast path area cache line 4. */
> +#endif

Is it an ABI breakage?

>                 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; 
> /**< Next nodes. */
>         };
>  };
>

Reply via email to