Hi David,

On 16/11/2021 14:53, David Marchand wrote:
On Fri, Nov 12, 2021 at 3:17 PM Vladimir Medvedkin
<vladimir.medved...@intel.com> wrote:

1. This patch replaces _mm512_set_epi8 with _mm512_set_epi32
due to the lack of support by some compilers.

Ok, it was the initial report from Lance.

2. This patch checks if AVX512F is supported along with GFNI.
This is done if the code is built on a platform that supports GFNI,
but does not support AVX512.

Ok.

3. Also this patch fixes compilation problems on 32bit arch due to
lack of support for _mm_extract_epi64() by implementing XOR folding
with _mm_extract_epi32() on 32-bit arch.

This code is under a #if defined(__GFNI__) && defined(__AVX512F__).

Does such a 32 bits processor exist, that supports AVX512 and GFNI?



This breaks the 32 bit build.


Fixes: 4fd8c4cb0de1 ("hash: add new Toeplitz hash implementation")
Cc: vladimir.medved...@intel.com

Signed-off-by: Vladimir Medvedkin <vladimir.medved...@intel.com>
Acked-by: Lance Richardson <lance.richard...@broadcom.com>
Acked-by: Ji, Kai <kai...@intel.com>
---
  lib/hash/rte_thash_x86_gfni.h | 44 ++++++++++++++++++++---------------
  1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/lib/hash/rte_thash_x86_gfni.h b/lib/hash/rte_thash_x86_gfni.h
index c2889c3734..987dec4988 100644
--- a/lib/hash/rte_thash_x86_gfni.h
+++ b/lib/hash/rte_thash_x86_gfni.h
@@ -18,7 +18,7 @@
  extern "C" {
  #endif

-#ifdef __GFNI__
+#if defined(__GFNI__) && defined(__AVX512F__)

Please update #endif comments accordingly, or remove invalid/obsolete
comment about _GFNI_.


Sure, will do.


  #define RTE_THASH_GFNI_DEFINED

  #define RTE_THASH_FIRST_ITER_MSK       0x0f0f0f0f0f0e0c08
@@ -33,7 +33,6 @@ __rte_thash_xor_reduce(__m512i xor_acc, uint32_t *val_1, 
uint32_t *val_2)
  {
         __m256i tmp_256_1, tmp_256_2;
         __m128i tmp128_1, tmp128_2;
-       uint64_t tmp_1, tmp_2;

         tmp_256_1 = _mm512_castsi512_si256(xor_acc);
         tmp_256_2 = _mm512_extracti32x8_epi32(xor_acc, 1);
@@ -43,12 +42,24 @@ __rte_thash_xor_reduce(__m512i xor_acc, uint32_t *val_1, 
uint32_t *val_2)
         tmp128_2 = _mm256_extracti32x4_epi32(tmp_256_1, 1);
         tmp128_1 = _mm_xor_si128(tmp128_1, tmp128_2);

+#ifdef RTE_ARCH_X86_64
+       uint64_t tmp_1, tmp_2;
         tmp_1 = _mm_extract_epi64(tmp128_1, 0);
         tmp_2 = _mm_extract_epi64(tmp128_1, 1);
         tmp_1 ^= tmp_2;

         *val_1 = (uint32_t)tmp_1;
         *val_2 = (uint32_t)(tmp_1 >> 32);
+#else
+       uint32_t tmp_1, tmp_2;
+       tmp_1 = _mm_extract_epi32(tmp128_1, 0);
+       tmp_2 = _mm_extract_epi32(tmp128_1, 1);
+       tmp_1 ^= _mm_extract_epi32(tmp128_1, 2);
+       tmp_2 ^= _mm_extract_epi32(tmp128_1, 3);
+
+       *val_1 = tmp_1;
+       *val_2 = tmp_2;
+#endif
  }

  __rte_internal
@@ -56,23 +67,18 @@ static inline __m512i
  __rte_thash_gfni(const uint64_t *mtrx, const uint8_t *tuple,
         const uint8_t *secondary_tuple, int len)
  {
-       __m512i permute_idx = _mm512_set_epi8(7, 6, 5, 4, 7, 6, 5, 4,
-                                               6, 5, 4, 3, 6, 5, 4, 3,
-                                               5, 4, 3, 2, 5, 4, 3, 2,
-                                               4, 3, 2, 1, 4, 3, 2, 1,
-                                               3, 2, 1, 0, 3, 2, 1, 0,
-                                               2, 1, 0, -1, 2, 1, 0, -1,
-                                               1, 0, -1, -2, 1, 0, -1, -2,
-                                               0, -1, -2, -3, 0, -1, -2, -3);
-
-       const __m512i rewind_idx = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-                                               0, 0, 0, 0, 0, 0, 0, 0,
-                                               0, 0, 0, 0, 0, 0, 0, 0,
-                                               0, 0, 0, 0, 0, 0, 0, 0,
-                                               0, 0, 0, 0, 0, 0, 0, 0,
-                                               0, 0, 0, 59, 0, 0, 0, 59,
-                                               0, 0, 59, 58, 0, 0, 59, 58,
-                                               0, 59, 58, 57, 0, 59, 58, 57);
+       __m512i permute_idx = _mm512_set_epi32(0x7060504, 0x7060504,

Nit: it is easier to read fully expanded 32 bits values, like
0x07060504 instead of 0x7060504
Etc...


Will fix in v3.


+                                               0x6050403, 0x6050403,
+                                               0x5040302, 0x5040302,
+                                               0x4030201, 0x4030201,
+                                               0x3020100, 0x3020100,
+                                               0x20100FF, 0x20100FF,
+                                               0x100FFFE, 0x100FFFE,
+                                               0xFFFEFD, 0xFFFEFD);
+       const __m512i rewind_idx = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0,
+                                                       0, 0, 0x3B, 0x3B,
+                                                       0x3B3A, 0x3B3A,
+                                                       0x3B3A39, 0x3B3A39);
         const __mmask64 rewind_mask = RTE_THASH_REWIND_MSK;
         const __m512i shift_8 = _mm512_set1_epi8(8);
         __m512i xor_acc = _mm512_setzero_si512();
--
2.25.1




--
Regards,
Vladimir

Reply via email to