ping
> -----原始邮件----- > 发件人: "Sun Yuechi" <[email protected]> > 发送时间: 2025-11-16 23:50:01 (星期日) > 收件人: [email protected] > 抄送: "Sun Yuechi" <[email protected]>, Zijian <[email protected]>, "Stanisław Kardach" <[email protected]>, "Nithin Dabilpuram" <[email protected]>, "Pavan Nikhilesh" <[email protected]> > 主题: [PATCH] node: lookup with RISC-V vector extension > > Implement ip4_lookup_node_process_vec function for RISC-V architecture > using RISC-V Vector Extension instruction set > > Signed-off-by: Sun Yuechi <[email protected]> > Signed-off-by: Zijian <[email protected]> > --- > lib/eal/riscv/include/rte_vect.h | 2 +- > lib/node/ip4_lookup.c | 5 +- > lib/node/ip4_lookup_rvv.h | 167 +++++++++++++++++++++++++++++++ > 3 files changed, 172 insertions(+), 2 deletions(-) > create mode 100644 lib/node/ip4_lookup_rvv.h > > diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h > index a4357e266a..4d16082449 100644 > --- a/lib/eal/riscv/include/rte_vect.h > +++ b/lib/eal/riscv/include/rte_vect.h > @@ -19,7 +19,7 @@ > extern "C" { > #endif > > -#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED > +#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128 > > typedef int32_t xmm_t __attribute__((vector_size(16))); > > diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c > index 9673a0d78d..d3aed089f4 100644 > --- a/lib/node/ip4_lookup.c > +++ b/lib/node/ip4_lookup.c > @@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm; > #include "ip4_lookup_neon.h" > #elif defined(RTE_ARCH_X86) > #include "ip4_lookup_sse.h" > +#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V) > +#include "ip4_lookup_rvv.h" > #endif > > static uint16_t > @@ -211,7 +213,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node) > IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket]; > IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn; > > -#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) > +#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \ > + (defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)) > if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) > node->process = ip4_lookup_node_process_vec; > #endif > diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h > new file mode 100644 > index 0000000000..a74e4fa204 > --- /dev/null > +++ b/lib/node/ip4_lookup_rvv.h > @@ -0,0 +1,167 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). > + */ > + > +#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__ > +#define __INCLUDE_IP4_LOOKUP_RVV_H__ > + > +#define RTE_LPM_LOOKUP_SUCCESS 0x01000000 > +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000 > + > +static __rte_always_inline vuint32m8_t > +bswap32_vec(vuint32m8_t v, size_t vl) > +{ > + vuint32m8_t low16 = __riscv_vor_vv_u32m8( > + __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl), > + __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl), > + vl); > + > + vuint32m8_t high16 = __riscv_vor_vv_u32m8( > + __riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl), > + __riscv_vsrl_vx_u32m8(v, 24, vl), > + vl); > + > + return __riscv_vor_vv_u32m8(low16, high16, vl); > +} > + > +static __rte_always_inline void > +rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > + uint32_t *hop, size_t vl, uint32_t defv) > +{ > + /* Load IP addresses (network byte order) */ > + vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl); > + > + vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8( > + __riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl); > + > + vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8( > + (const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl); > + > + vbool4_t mask = __riscv_vmseq_vx_u32m8_b4( > + __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl), > + RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl); > + > + vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8( > + __riscv_vadd_vv_u32m8( > + __riscv_vsll_vx_u32m8( > + __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl), > + __riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl), > + 2, vl); > + > + vtbl_entry = __riscv_vluxei32_v_u32m8_mu( > + mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl); > + > + vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl); > + mask = __riscv_vmseq_vx_u32m8_b4( > + __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl); > + > + vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl); > + > + __riscv_vse32_v_u32m8(hop, vnext_hop, vl); > +} > + > +/* Can be increased further for VLEN > 256 */ > +#define RVV_MAX_BURST 64U > + > +static uint16_t > +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node, > + void **objs, uint16_t nb_objs) > +{ > + struct rte_mbuf **pkts; > + struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx); > + const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx); > + rte_edge_t next_index; > + void **to_next, **from; > + uint16_t last_spec = 0; > + uint16_t n_left_from; > + uint16_t held = 0; > + uint32_t drop_nh; > + > + /* Temporary arrays for batch processing */ > + uint32_t ips[RVV_MAX_BURST]; > + uint32_t res[RVV_MAX_BURST]; > + rte_edge_t next_hops[RVV_MAX_BURST]; > + > + /* Speculative next */ > + next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; > + /* Drop node */ > + drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; > + > + pkts = (struct rte_mbuf **)objs; > + from = objs; > + n_left_from = nb_objs; > + > + /* Get stream for the speculated next node */ > + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); > + > + while (n_left_from > 0) { > + rte_edge_t fix_spec = 0; > + > + size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST)); > + > + /* Extract IP addresses and metadata from current batch */ > + for (size_t i = 0; i < vl; i++) { > + struct rte_ipv4_hdr *ipv4_hdr = > + rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *, > + sizeof(struct rte_ether_hdr)); > + ips[i] = ipv4_hdr->dst_addr; > + node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum; > + node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live; > + } > + > + /* Perform LPM lookup */ > + rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh); > + > + for (size_t i = 0; i < vl; i++) { > + /* Update statistics */ > + if ((res[i] >> 16) == (drop_nh >> 16)) > + NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1); > + > + /* Extract next hop and next node */ > + node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF; > + next_hops[i] = res[i] >> 16; > + > + /* Check speculation */ > + fix_spec |= (next_index ^ next_hops[i]); > + } > + > + if (unlikely(fix_spec)) { > + /* Copy successfully speculated packets before this batch */ > + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); > + from += last_spec; > + to_next += last_spec; > + held += last_spec; > + last_spec = 0; > + > + /* Process each packet in current batch individually */ > + for (size_t i = 0; i < vl; i++) { > + if (next_index == next_hops[i]) { > + *to_next++ = from[i]; > + held++; > + } else { > + rte_node_enqueue_x1(graph, node, next_hops[i], from[i]); > + } > + } > + > + from += vl; > + } else { > + last_spec += vl; > + } > + > + pkts += vl; > + n_left_from -= vl; > + } > + > + /* Handle successfully speculated packets */ > + if (likely(last_spec == nb_objs)) { > + rte_node_next_stream_move(graph, node, next_index); > + return nb_objs; > + } > + > + held += last_spec; > + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); > + rte_node_next_stream_put(graph, node, next_index, held); > + > + return nb_objs; > +} > +#endif > -- > 2.51.2 </[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]></[email protected]>

