Hi Ciara,

On 30/09/2020 14:04, Ciara Power wrote:
When choosing the vector path, max SIMD bitwidth is now checked to
ensure a vector path is allowable. To do this, rather than the vector
lookup functions being called directly from apps, a generic lookup
function is called which will call the vector functions if suitable.

Signed-off-by: Ciara Power <ciara.po...@intel.com>
---
  lib/librte_lpm/rte_lpm.h         | 57 ++++++++++++++++++++++++++------
  lib/librte_lpm/rte_lpm_altivec.h |  2 +-
  lib/librte_lpm/rte_lpm_neon.h    |  2 +-
  lib/librte_lpm/rte_lpm_sse.h     |  2 +-
  4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 03da2d37e0..edba7cafd5 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -397,8 +397,18 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const 
uint32_t *ips,
  /* Mask four results. */
  #define        RTE_LPM_MASKX4_RES     UINT64_C(0x00ffffff00ffffff)
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#include "rte_lpm_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "rte_lpm_altivec.h"
+#else
+#include "rte_lpm_sse.h"
+#endif
+
  /**
- * Lookup four IP addresses in an LPM table.
+ * Lookup four IP addresses in an LPM table individually by calling the
+ * lookup function for each ip. This is used when lookupx4 is called but
+ * the vector path is not suitable.
   *
   * @param lpm
   *   LPM object handle
@@ -417,16 +427,43 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const 
uint32_t *ips,
   *   if lookup would fail.
   */
  static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
-       uint32_t defv);
+rte_lpm_lookupx4_scalar(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+       uint32_t defv)
+{
+       int i;
+       for (i = 0; i < 4; i++)
+               if (rte_lpm_lookup(lpm, ((uint32_t *) &ip)[i], &hop[i]) < 0)
+                       hop[i] = defv; /* lookupx4 expected to set on failure */
+}
-#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
-#include "rte_lpm_neon.h"
-#elif defined(RTE_ARCH_PPC_64)
-#include "rte_lpm_altivec.h"
-#else
-#include "rte_lpm_sse.h"
-#endif
+/**
+ * Lookup four IP addresses in an LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   Four IPs to be looked up in the LPM table
+ * @param hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit 
only).
+ *   This is an 4 elements array of two byte values.
+ *   If the lookup was successful for the given IP, then least significant byte
+ *   of the corresponding element is the  actual next hop and the most
+ *   significant byte is zero.
+ *   If the lookup for the given IP failed, then corresponding element would
+ *   contain default value, see description of then next parameter.
+ * @param defv
+ *   Default value to populate into corresponding element of hop[] array,
+ *   if lookup would fail.
+ */
+static inline void
+rte_lpm_lookupx4(struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+       uint32_t defv)
+{
+       if (rte_get_max_simd_bitwidth() >= RTE_MAX_128_SIMD)
+               rte_lpm_lookupx4_vec(lpm, ip, hop, defv);
+       else
+               rte_lpm_lookupx4_scalar(lpm, ip, hop, defv);
+}

I'm afraid this will lead to a drop in performance. rte_lpm_lookupx4 is used in the hot path, and a bulk size is too small to amortize the cost of adding this extra logic.

#ifdef __cplusplus
  }
diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
index 228c41b38e..82142d3351 100644
--- a/lib/librte_lpm/rte_lpm_altivec.h
+++ b/lib/librte_lpm/rte_lpm_altivec.h
@@ -16,7 +16,7 @@ extern "C" {
  #endif
static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
        uint32_t defv)
  {
        vector signed int i24;
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
index 6c131d3125..14b184515d 100644
--- a/lib/librte_lpm/rte_lpm_neon.h
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -16,7 +16,7 @@ extern "C" {
  #endif
static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
        uint32_t defv)
  {
        uint32x4_t i24;
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
index 44770b6ff8..cb5477c6cf 100644
--- a/lib/librte_lpm/rte_lpm_sse.h
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -15,7 +15,7 @@ extern "C" {
  #endif
static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+rte_lpm_lookupx4_vec(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
        uint32_t defv)
  {
        __m128i i24;


--
Regards,
Vladimir

Reply via email to