> >> without the ' -mavx2' option for gcc, there are compiling error: > >> '__m256i undeclared', the __attribute__((target("avx2"))) can't solve > >> this issue. Any idea? > > > > You're right that you can't use the normal __m256i, as it doesn't get > > declared. > > It should be declared. *intrin.h uses #pragma GCC target and always defines > all vector types. > > In fact, the following compiles for me with just "gcc foo.c" under GCC 5.x: > > #include <immintrin.h> > > // #if defined CONFIG_IFUNC && defined CONFIG_AVX2 #pragma GCC > push_options #pragma GCC target("avx2") > #define AVX2_VECTYPE __m256i > #define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) > #define AVX2_ALL_EQ(v1, v2) \ > (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) > #define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) > > size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) { > const AVX2_VECTYPE *p = buf; > const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; > size_t i; > > if (!len) { > return 0; > } > > for (i = 0; i < 4; i++) { > if (!AVX2_ALL_EQ(p[i], zero)) { > return i * sizeof(AVX2_VECTYPE); > } > } > > for (i = 4; i < len / sizeof(AVX2_VECTYPE); i += 4) { > AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); > AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); > AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); > AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); > AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); > AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); > if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { > break; > } > } > > return i * sizeof(AVX2_VECTYPE); > } > > #pragma GCC pop_options > // #endif > > so perhaps the configure test is testing the wrong thing? > > Paolo
Hi Paolo, what's your opinion? putting the AVX2 related code to util/cutils.c and use the "#pragma ..." you referred? The configure test is ok, it use the "-mavx2". Liang