Hi Sun,

You did not address my previous comments regarding commit message. You can put everything you've wrote in this commit as a note and add meaningful description about what commit generally does, like (please correct if needed):

"Implement LPM lookupx4 routine for RISC-V architecture using RISC-V Vector Extension instruction set"

Everything else (performance tests, implementation thoughts and considerations, etc.) should be in the patch notes. For more information on what "patch notes" are, you may want refer to Git documentation [1].

[1] https://git-scm.com/docs/git-notes

On 05/06/2025 11:58, u...@foxmail.com wrote:

From: Sun Yuechi <sunyue...@iscas.ac.cn>

The initialization of vtbl_entry is not fully vectorized here because
doing so would require __riscv_vluxei32_v_u32m1, which is slower
than the scalar approach in this small-scale scenario.

- Test: app/test/lpm_perf_autotest
- Platform: Banana Pi(BPI-F3)
- SoC: Spacemit X60 (8 cores with Vector extension)
- CPU Frequency: up to 1.6 GHz
- Cache: 256 KiB L1d ×8, 256 KiB L1i ×8, 1 MiB L2 ×2
- Memory: 16 GiB
- Kernel: Linux 6.6.36
- Compiler: GCC 14.2.0 (with RVV intrinsic support)

Test results(LPM LookupX4):
     scalar: 5.7 cycles
     rvv:    4.6 cycles

Signed-off-by: Sun Yuechi <sunyue...@iscas.ac.cn>
---
  MAINTAINERS           |  2 ++
  lib/lpm/meson.build   |  1 +
  lib/lpm/rte_lpm.h     |  2 ++
  lib/lpm/rte_lpm_rvv.h | 62 +++++++++++++++++++++++++++++++++++++++++++
  4 files changed, 67 insertions(+)
  create mode 100644 lib/lpm/rte_lpm_rvv.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3e16789250..0f207ac129 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -340,6 +340,8 @@ M: Stanislaw Kardach <stanislaw.kard...@gmail.com>
  F: config/riscv/
  F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
  F: lib/eal/riscv/
+M: sunyuechi <sunyue...@iscas.ac.cn>
+F: lib/**/*rvv*
Intel x86
  M: Bruce Richardson <bruce.richard...@intel.com>
diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build
index fae4f79fb9..09133061e5 100644
--- a/lib/lpm/meson.build
+++ b/lib/lpm/meson.build
@@ -17,6 +17,7 @@ indirect_headers += files(
          'rte_lpm_scalar.h',
          'rte_lpm_sse.h',
          'rte_lpm_sve.h',
+        'rte_lpm_rvv.h',
  )
  deps += ['hash']
  deps += ['rcu']
diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h
index 7df64f06b1..b06517206f 100644
--- a/lib/lpm/rte_lpm.h
+++ b/lib/lpm/rte_lpm.h
@@ -408,6 +408,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, 
uint32_t hop[4],
  #include "rte_lpm_altivec.h"
  #elif defined(RTE_ARCH_X86)
  #include "rte_lpm_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "rte_lpm_rvv.h"
  #else
  #include "rte_lpm_scalar.h"
  #endif
diff --git a/lib/lpm/rte_lpm_rvv.h b/lib/lpm/rte_lpm_rvv.h
new file mode 100644
index 0000000000..5f48fb2b32
--- /dev/null
+++ b/lib/lpm/rte_lpm_rvv.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences 
(ISCAS).
+ */
+
+#ifndef _RTE_LPM_RVV_H_
+#define _RTE_LPM_RVV_H_
+
+#include <rte_vect.h>
+
+#include <rte_cpuflags.h>
+#include <riscv_vector.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+static inline void rte_lpm_lookupx4(
+       const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv)
+{
+       size_t vl = 4;
+
+       const uint32_t *tbl24_p = (const uint32_t *)lpm->tbl24;
+       uint32_t tbl_entries[4] = {
+               tbl24_p[((uint32_t)ip[0]) >> 8],
+               tbl24_p[((uint32_t)ip[1]) >> 8],
+               tbl24_p[((uint32_t)ip[2]) >> 8],
+               tbl24_p[((uint32_t)ip[3]) >> 8],
+       };
+       vuint32m1_t vtbl_entry = __riscv_vle32_v_u32m1(tbl_entries, vl);
+
+       vbool32_t mask = __riscv_vmseq_vx_u32m1_b32(
+           __riscv_vand_vx_u32m1(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, 
vl),
+           RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+       vuint32m1_t vtbl8_index = __riscv_vsll_vx_u32m1(
+           __riscv_vadd_vv_u32m1(
+               __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(vtbl_entry, 
0x00FFFFFF, vl), 8, vl),
+               __riscv_vand_vx_u32m1(
+                   __riscv_vle32_v_u32m1((const uint32_t *)&ip, vl), 
0x000000FF, vl),
+               vl),
+           2, vl);
+
+       vtbl_entry = __riscv_vluxei32_v_u32m1_mu(
+           mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+       vuint32m1_t vnext_hop = __riscv_vand_vx_u32m1(vtbl_entry, 0x00FFFFFF, 
vl);
+       mask = __riscv_vmseq_vx_u32m1_b32(
+           __riscv_vand_vx_u32m1(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, 
vl);
+
+       vnext_hop = __riscv_vmerge_vxm_u32m1(vnext_hop, defv, mask, vl);
+
+       __riscv_vse32_v_u32m1(hop, vnext_hop, vl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_RVV_H_ */

--
Regards,
Vladimir

Reply via email to