ping

> -----原始邮件-----
&gt; 发件人: "Sun Yuechi" <[email protected]>
&gt; 发送时间: 2025-11-16 23:50:01 (星期日)
&gt; 收件人: [email protected]
&gt; 抄送: "Sun Yuechi" <[email protected]>, Zijian 
<[email protected]>, "Stanisław Kardach" 
<[email protected]>, "Nithin Dabilpuram" <[email protected]>, 
"Pavan Nikhilesh" <[email protected]>
&gt; 主题: [PATCH] node: lookup with RISC-V vector extension
&gt; 
&gt; Implement ip4_lookup_node_process_vec function for RISC-V architecture
&gt; using RISC-V Vector Extension instruction set
&gt; 
&gt; Signed-off-by: Sun Yuechi <[email protected]>
&gt; Signed-off-by: Zijian <[email protected]>
&gt; ---
&gt;  lib/eal/riscv/include/rte_vect.h |   2 +-
&gt;  lib/node/ip4_lookup.c            |   5 +-
&gt;  lib/node/ip4_lookup_rvv.h        | 167 +++++++++++++++++++++++++++++++
&gt;  3 files changed, 172 insertions(+), 2 deletions(-)
&gt;  create mode 100644 lib/node/ip4_lookup_rvv.h
&gt; 
&gt; diff --git a/lib/eal/riscv/include/rte_vect.h 
b/lib/eal/riscv/include/rte_vect.h
&gt; index a4357e266a..4d16082449 100644
&gt; --- a/lib/eal/riscv/include/rte_vect.h
&gt; +++ b/lib/eal/riscv/include/rte_vect.h
&gt; @@ -19,7 +19,7 @@
&gt;  extern "C" {
&gt;  #endif
&gt;  
&gt; -#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
&gt; +#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
&gt;  
&gt;  typedef int32_t           xmm_t __attribute__((vector_size(16)));
&gt;  
&gt; diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
&gt; index 9673a0d78d..d3aed089f4 100644
&gt; --- a/lib/node/ip4_lookup.c
&gt; +++ b/lib/node/ip4_lookup.c
&gt; @@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
&gt;  #include "ip4_lookup_neon.h"
&gt;  #elif defined(RTE_ARCH_X86)
&gt;  #include "ip4_lookup_sse.h"
&gt; +#elif defined(RTE_ARCH_RISCV) &amp;&amp; defined(RTE_RISCV_FEATURE_V)
&gt; +#include "ip4_lookup_rvv.h"
&gt;  #endif
&gt;  
&gt;  static uint16_t
&gt; @@ -211,7 +213,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, 
struct rte_node *node)
&gt;    IP4_LOOKUP_NODE_LPM(node-&gt;ctx) = 
ip4_lookup_nm.lpm_tbl[graph-&gt;socket];
&gt;    IP4_LOOKUP_NODE_PRIV1_OFF(node-&gt;ctx) = dyn;
&gt;  
&gt; -#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
&gt; +#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
&gt; +  (defined(RTE_ARCH_RISCV) &amp;&amp; defined(RTE_RISCV_FEATURE_V))
&gt;    if (rte_vect_get_max_simd_bitwidth() &gt;= RTE_VECT_SIMD_128)
&gt;            node-&gt;process = ip4_lookup_node_process_vec;
&gt;  #endif
&gt; diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
&gt; new file mode 100644
&gt; index 0000000000..a74e4fa204
&gt; --- /dev/null
&gt; +++ b/lib/node/ip4_lookup_rvv.h
&gt; @@ -0,0 +1,167 @@
&gt; +/* SPDX-License-Identifier: BSD-3-Clause
&gt; + * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences 
(ISCAS).
&gt; + */
&gt; +
&gt; +#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
&gt; +#define __INCLUDE_IP4_LOOKUP_RVV_H__
&gt; +
&gt; +#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
&gt; +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
&gt; +
&gt; +static __rte_always_inline vuint32m8_t
&gt; +bswap32_vec(vuint32m8_t v, size_t vl)
&gt; +{
&gt; +  vuint32m8_t low16 = __riscv_vor_vv_u32m8(
&gt; +          __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, 
vl),
&gt; +          __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, 
vl),
&gt; +          vl);
&gt; +
&gt; +  vuint32m8_t high16 = __riscv_vor_vv_u32m8(
&gt; +          __riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 
8, vl),
&gt; +          __riscv_vsrl_vx_u32m8(v, 24, vl),
&gt; +          vl);
&gt; +
&gt; +  return __riscv_vor_vv_u32m8(low16, high16, vl);
&gt; +}
&gt; +
&gt; +static __rte_always_inline void
&gt; +rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
&gt; +                  uint32_t *hop, size_t vl, uint32_t defv)
&gt; +{
&gt; +  /* Load IP addresses (network byte order) */
&gt; +  vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
&gt; +
&gt; +  vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
&gt; +                  __riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
&gt; +
&gt; +  vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
&gt; +          (const uint32_t *)lpm-&gt;tbl24, v_tbl24_byte_offset, vl);
&gt; +
&gt; +  vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
&gt; +          __riscv_vand_vx_u32m8(vtbl_entry, 
RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
&gt; +          RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
&gt; +
&gt; +  vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
&gt; +          __riscv_vadd_vv_u32m8(
&gt; +                  __riscv_vsll_vx_u32m8(
&gt; +                          __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, 
vl), 8, vl),
&gt; +                  __riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
&gt; +          2, vl);
&gt; +
&gt; +  vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
&gt; +          mask, vtbl_entry, (const uint32_t *)(lpm-&gt;tbl8), 
vtbl8_index, vl);
&gt; +
&gt; +  vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, 
vl);
&gt; +  mask = __riscv_vmseq_vx_u32m8_b4(
&gt; +          __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 
0, vl);
&gt; +
&gt; +  vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
&gt; +
&gt; +  __riscv_vse32_v_u32m8(hop, vnext_hop, vl);
&gt; +}
&gt; +
&gt; +/* Can be increased further for VLEN &gt; 256 */
&gt; +#define RVV_MAX_BURST 64U
&gt; +
&gt; +static uint16_t
&gt; +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node 
*node,
&gt; +                  void **objs, uint16_t nb_objs)
&gt; +{
&gt; +  struct rte_mbuf **pkts;
&gt; +  struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node-&gt;ctx);
&gt; +  const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node-&gt;ctx);
&gt; +  rte_edge_t next_index;
&gt; +  void **to_next, **from;
&gt; +  uint16_t last_spec = 0;
&gt; +  uint16_t n_left_from;
&gt; +  uint16_t held = 0;
&gt; +  uint32_t drop_nh;
&gt; +
&gt; +  /* Temporary arrays for batch processing */
&gt; +  uint32_t ips[RVV_MAX_BURST];
&gt; +  uint32_t res[RVV_MAX_BURST];
&gt; +  rte_edge_t next_hops[RVV_MAX_BURST];
&gt; +
&gt; +  /* Speculative next */
&gt; +  next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
&gt; +  /* Drop node */
&gt; +  drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) &lt;&lt; 16;
&gt; +
&gt; +  pkts = (struct rte_mbuf **)objs;
&gt; +  from = objs;
&gt; +  n_left_from = nb_objs;
&gt; +
&gt; +  /* Get stream for the speculated next node */
&gt; +  to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
&gt; +
&gt; +  while (n_left_from &gt; 0) {
&gt; +          rte_edge_t fix_spec = 0;
&gt; +
&gt; +          size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, 
RVV_MAX_BURST));
&gt; +
&gt; +          /* Extract IP addresses and metadata from current batch */
&gt; +          for (size_t i = 0; i &lt; vl; i++) {
&gt; +                  struct rte_ipv4_hdr *ipv4_hdr =
&gt; +                          rte_pktmbuf_mtod_offset(pkts[i], struct 
rte_ipv4_hdr *,
&gt; +                                          sizeof(struct rte_ether_hdr));
&gt; +                  ips[i] = ipv4_hdr-&gt;dst_addr;
&gt; +                  node_mbuf_priv1(pkts[i], dyn)-&gt;cksum = 
ipv4_hdr-&gt;hdr_checksum;
&gt; +                  node_mbuf_priv1(pkts[i], dyn)-&gt;ttl = 
ipv4_hdr-&gt;time_to_live;
&gt; +          }
&gt; +
&gt; +          /* Perform LPM lookup */
&gt; +          rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
&gt; +
&gt; +          for (size_t i = 0; i &lt; vl; i++) {
&gt; +                  /* Update statistics */
&gt; +                  if ((res[i] &gt;&gt; 16) == (drop_nh &gt;&gt; 16))
&gt; +                          NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
&gt; +
&gt; +                  /* Extract next hop and next node */
&gt; +                  node_mbuf_priv1(pkts[i], dyn)-&gt;nh = res[i] &amp; 
0xFFFF;
&gt; +                  next_hops[i] = res[i] &gt;&gt; 16;
&gt; +
&gt; +                  /* Check speculation */
&gt; +                  fix_spec |= (next_index ^ next_hops[i]);
&gt; +          }
&gt; +
&gt; +          if (unlikely(fix_spec)) {
&gt; +                  /* Copy successfully speculated packets before this 
batch */
&gt; +                  rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
&gt; +                  from += last_spec;
&gt; +                  to_next += last_spec;
&gt; +                  held += last_spec;
&gt; +                  last_spec = 0;
&gt; +
&gt; +                  /* Process each packet in current batch individually */
&gt; +                  for (size_t i = 0; i &lt; vl; i++) {
&gt; +                          if (next_index == next_hops[i]) {
&gt; +                                  *to_next++ = from[i];
&gt; +                                  held++;
&gt; +                          } else {
&gt; +                                  rte_node_enqueue_x1(graph, node, 
next_hops[i], from[i]);
&gt; +                          }
&gt; +                  }
&gt; +
&gt; +                  from += vl;
&gt; +          } else {
&gt; +                  last_spec += vl;
&gt; +          }
&gt; +
&gt; +          pkts += vl;
&gt; +          n_left_from -= vl;
&gt; +  }
&gt; +
&gt; +  /* Handle successfully speculated packets */
&gt; +  if (likely(last_spec == nb_objs)) {
&gt; +          rte_node_next_stream_move(graph, node, next_index);
&gt; +          return nb_objs;
&gt; +  }
&gt; +
&gt; +  held += last_spec;
&gt; +  rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
&gt; +  rte_node_next_stream_put(graph, node, next_index, held);
&gt; +
&gt; +  return nb_objs;
&gt; +}
&gt; +#endif
&gt; -- 
&gt; 2.51.2
</[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]>

Reply via email to