================
@@ -3,165 +3,463 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s
-triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature
+avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s
-triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl
-emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s
-triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature
+avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter
| FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s
-triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature
+avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter
| FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s
-triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl
-emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter |
FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s
-triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl
-emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter |
FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A,
__m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}},
<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_dpbusd_epi32(
+ (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+ (__mmask8)0x55,
+
(__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+
(__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 104, 200, 304, 400, 504, 600, 704, 800));
__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A,
__m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}},
<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_dpbusd_epi32(
+ (__mmask8)0x0F,
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+
(__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+
(__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 0, 0, 0, 0));
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}},
<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+
((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+
((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
+
((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+
((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 14, 14, 14, 14, 14, 14, 14, 14));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+
((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+
((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647,
2147483647, 2147483647, 2147483647, 2147483647}),
+
((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+
((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1,
-2147483647-1, -2147483647-1, -2147483647-1));
----------------
RKSimon wrote:
We've generally tried to avoid uniform values in tests as it means we can miss
cross-element mismatches in the evaluations - it also means we can reduce the
number of TEST_CONSTEXPR entries
https://github.com/llvm/llvm-project/pull/190549
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits