adream307 wrote:

I have checked on actual avx512 hardware
```cpp
#include <stdio.h>
#include <immintrin.h>

void printv8hu(__v8hu v) {
  for (int i=0; i !=8; ++i) {
    printf("%d,", v[i]);
  }
  printf("\n");
}

void printv16hu(__v16hu v) {
  for (int i=0; i !=16; ++i) {
    printf("%d,", v[i]);
  }
  printf("\n");
}

int main() {

  __v8hu v0 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        0);
  __v8hu v1 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        1);  
  __v8hu v2 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        2);
  __v8hu v3 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        3);
  __v8hu v4 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        4);
  __v8hu v5 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        5);  
  __v8hu v6 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        6);
  __v8hu v7 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){11, 13, 17, 19, 23, 
29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}),
                                        ((__m128i)(__v16qu){167, 173, 179, 181, 
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                        7);  
  printv8hu(v0);
  printv8hu(v1);
  printv8hu(v2);
  printv8hu(v3);
  printv8hu(v4);
  printv8hu(v5);
  printv8hu(v6);
  printv8hu(v7);

  __v16hu r0 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            0);
  __v16hu r1 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            1);
  __v16hu r2 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            2);
  __v16hu r3 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            3);
  __v16hu r4 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            4);
  __v16hu r5 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            5);
  __v16hu r6 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            6);
  __v16hu r7 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            7);
  printv16hu(r0);
  printv16hu(r1);
  printv16hu(r2);
  printv16hu(r3);
  printv16hu(r4);
  printv16hu(r5);
  printv16hu(r6);
  printv16hu(r7);

  __v16hu t0 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            0<<3);
  __v16hu t1 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            1<<3);
  __v16hu t2 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            2<<3);
  __v16hu t3 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            3<<3);
  __v16hu t4 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            4<<3);
  __v16hu t5 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            5<<3);
  __v16hu t6 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            6<<3);
  __v16hu t7 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2, 3, 5, 7, 11, 
13, 17, 19, 23, 29,31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 
101, 103, 107, 109, 113, 127, 131}),
                                            ((__m256i)(__v32qu){83, 89, 97, 
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 
181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251}),
                                            7<<3);

  printv16hu(t0);
  printv16hu(t1);
  printv16hu(t2);
  printv16hu(t3);
  printv16hu(t4);
  printv16hu(t5);
  printv16hu(t6);
  printv16hu(t7);

  return 0;
}
```
this is the output
```txt
640,628,612,598,580,562,548,532,
720,708,692,678,660,642,628,612,
830,818,802,788,770,752,738,722,
904,892,876,862,844,826,812,796,
580,562,548,532,516,498,480,460,
660,642,628,612,596,578,560,540,
770,752,738,722,706,688,670,650,
844,826,812,796,780,762,744,724,
353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310,
415,406,396,384,372,360,344,330,442,428,410,394,376,352,330,310,
517,508,498,486,474,462,446,432,442,428,410,394,376,352,330,310,
603,594,584,572,560,548,532,518,442,428,410,394,376,352,330,310,
310,298,282,268,250,232,218,202,442,428,410,394,376,352,330,310,
372,360,344,330,312,294,280,264,442,428,410,394,376,352,330,310,
474,462,446,432,414,396,382,366,442,428,410,394,376,352,330,310,
560,548,532,518,500,482,468,452,442,428,410,394,376,352,330,310,
353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310,
353,344,334,322,310,298,282,268,522,508,490,474,456,432,410,390,
353,344,334,322,310,298,282,268,632,618,600,584,566,542,520,500,
353,344,334,322,310,298,282,268,706,692,674,658,640,616,594,574,
353,344,334,322,310,298,282,268,376,352,330,310,292,280,268,244,
353,344,334,322,310,298,282,268,456,432,410,390,372,360,348,324,
353,344,334,322,310,298,282,268,566,542,520,500,482,470,458,434,
353,344,334,322,310,298,282,268,640,616,594,574,556,544,532,508,

```

https://github.com/llvm/llvm-project/pull/202257
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to