Thanks, they have been updated in v5.
> -----原始邮件----- > 发件人: "Medvedkin, Vladimir" <vladimir.medved...@intel.com> > 发送时间: 2025-06-11 19:52:57 (星期三) > 收件人: u...@foxmail.com, dev@dpdk.org > 抄送: "Sun Yuechi" <sunyue...@iscas.ac.cn>, "Thomas Monjalon" <tho...@monjalon.net>, "Bruce Richardson" <bruce.richard...@intel.com>, "Stanislaw Kardach" <stanislaw.kard...@gmail.com> > 主题: Re: [PATCH v4 2/3] lib/lpm: R-V V rte_lpm_lookupx4 > > Hi Sun, > > You did not address my previous comments regarding commit message. You > can put everything you've wrote in this commit as a note and add > meaningful description about what commit generally does, like (please > correct if needed): > > "Implement LPM lookupx4 routine for RISC-V architecture using RISC-V > Vector Extension instruction set" > > Everything else (performance tests, implementation thoughts and > considerations, etc.) should be in the patch notes. For more information > on what "patch notes" are, you may want refer to Git documentation [1]. > > [1] https://git-scm.com/docs/git-notes > > On 05/06/2025 11:58, u...@foxmail.com wrote: > > > From: Sun Yuechi <sunyue...@iscas.ac.cn> > > > > The initialization of vtbl_entry is not fully vectorized here because > > doing so would require __riscv_vluxei32_v_u32m1, which is slower > > than the scalar approach in this small-scale scenario. > > > > - Test: app/test/lpm_perf_autotest > > - Platform: Banana Pi(BPI-F3) > > - SoC: Spacemit X60 (8 cores with Vector extension) > > - CPU Frequency: up to 1.6 GHz > > - Cache: 256 KiB L1d ×8, 256 KiB L1i ×8, 1 MiB L2 ×2 > > - Memory: 16 GiB > > - Kernel: Linux 6.6.36 > > - Compiler: GCC 14.2.0 (with RVV intrinsic support) > > > > Test results(LPM LookupX4): > > scalar: 5.7 cycles > > rvv: 4.6 cycles > > > > Signed-off-by: Sun Yuechi <sunyue...@iscas.ac.cn> > > --- > > MAINTAINERS | 2 ++ > > lib/lpm/meson.build | 1 + > > lib/lpm/rte_lpm.h | 2 ++ > > lib/lpm/rte_lpm_rvv.h | 62 +++++++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 67 insertions(+) > > create mode 100644 lib/lpm/rte_lpm_rvv.h > > > > diff --git a/MAINTAINERS b/MAINTAINERS > > index 3e16789250..0f207ac129 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -340,6 +340,8 @@ M: Stanislaw Kardach <stanislaw.kard...@gmail.com> > > F: config/riscv/ > > F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst > > F: lib/eal/riscv/ > > +M: sunyuechi <sunyue...@iscas.ac.cn> > > +F: lib/**/*rvv* > > > > Intel x86 > > M: Bruce Richardson <bruce.richard...@intel.com> > > diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build > > index fae4f79fb9..09133061e5 100644 > > --- a/lib/lpm/meson.build > > +++ b/lib/lpm/meson.build > > @@ -17,6 +17,7 @@ indirect_headers += files( > > 'rte_lpm_scalar.h', > > 'rte_lpm_sse.h', > > 'rte_lpm_sve.h', > > + 'rte_lpm_rvv.h', > > ) > > deps += ['hash'] > > deps += ['rcu'] > > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > > index 7df64f06b1..b06517206f 100644 > > --- a/lib/lpm/rte_lpm.h > > +++ b/lib/lpm/rte_lpm.h > > @@ -408,6 +408,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > #include "rte_lpm_altivec.h" > > #elif defined(RTE_ARCH_X86) > > #include "rte_lpm_sse.h" > > +#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V) > > +#include "rte_lpm_rvv.h" > > #else > > #include "rte_lpm_scalar.h" > > #endif > > diff --git a/lib/lpm/rte_lpm_rvv.h b/lib/lpm/rte_lpm_rvv.h > > new file mode 100644 > > index 0000000000..5f48fb2b32 > > --- /dev/null > > +++ b/lib/lpm/rte_lpm_rvv.h > > @@ -0,0 +1,62 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). > > + */ > > + > > +#ifndef _RTE_LPM_RVV_H_ > > +#define _RTE_LPM_RVV_H_ > > + > > +#include <rte_vect.h> > > + > > +#include <rte_cpuflags.h> > > +#include <riscv_vector.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +#define RTE_LPM_LOOKUP_SUCCESS 0x01000000 > > +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000 > > + > > +static inline void rte_lpm_lookupx4( > > + const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv) > > +{ > > + size_t vl = 4; > > + > > + const uint32_t *tbl24_p = (const uint32_t *)lpm->tbl24; > > + uint32_t tbl_entries[4] = { > > + tbl24_p[((uint32_t)ip[0]) >> 8], > > + tbl24_p[((uint32_t)ip[1]) >> 8], > > + tbl24_p[((uint32_t)ip[2]) >> 8], > > + tbl24_p[((uint32_t)ip[3]) >> 8], > > + }; > > + vuint32m1_t vtbl_entry = __riscv_vle32_v_u32m1(tbl_entries, vl); > > + > > + vbool32_t mask = __riscv_vmseq_vx_u32m1_b32( > > + __riscv_vand_vx_u32m1(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl), > > + RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl); > > + > > + vuint32m1_t vtbl8_index = __riscv_vsll_vx_u32m1( > > + __riscv_vadd_vv_u32m1( > > + __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(vtbl_entry, 0x00FFFFFF, vl), 8, vl), > > + __riscv_vand_vx_u32m1( > > + __riscv_vle32_v_u32m1((const uint32_t *)&ip, vl), 0x000000FF, vl), > > + vl), > > + 2, vl); > > + > > + vtbl_entry = __riscv_vluxei32_v_u32m1_mu( > > + mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl); > > + > > + vuint32m1_t vnext_hop = __riscv_vand_vx_u32m1(vtbl_entry, 0x00FFFFFF, vl); > > + mask = __riscv_vmseq_vx_u32m1_b32( > > + __riscv_vand_vx_u32m1(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl); > > + > > + vnext_hop = __riscv_vmerge_vxm_u32m1(vnext_hop, defv, mask, vl); > > + > > + __riscv_vse32_v_u32m1(hop, vnext_hop, vl); > > +} > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* _RTE_LPM_RVV_H_ */ > > -- > Regards, > Vladimir </riscv_vector.h></rte_cpuflags.h></rte_vect.h></bruce.richard...@intel.com></sunyue...@iscas.ac.cn></stanislaw.kard...@gmail.com></sunyue...@iscas.ac.cn></sunyue...@iscas.ac.cn></stanislaw.kard...@gmail.com></bruce.richard...@intel.com></tho...@monjalon.net></sunyue...@iscas.ac.cn></vladimir.medved...@intel.com>