https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89992
Bug ID: 89992 Summary: Vectorizer is very sensitive to function calls Product: gcc Version: 9.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: hjl.tools at gmail dot com Target Milestone: --- [hjl@gnu-cfl-1 xxx]$ cat x.c static __inline unsigned int __get_cpuid_max (unsigned int __ext, unsigned int *__sig) { unsigned int __eax, __ebx, __ecx, __edx; __asm__ ("cpuid\n\t" : "=a" (__eax), "=b" (__ebx), "=c" (__ecx), "=d" (__edx) : "0" (__ext)); if (__sig) *__sig = __ebx; return __eax; } static __inline int __get_cpuid_count (unsigned int __leaf, unsigned int __subleaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __ext = __leaf & 0x80000000; unsigned int __maxlevel = __get_cpuid_max (__ext, 0); if (__maxlevel == 0 || __maxlevel < __leaf) return 0; __asm__ ("cpuid\n\t" : "=a" (*__eax), "=b" (*__ebx), "=c" (*__ecx), "=d" (*__edx) : "0" (__leaf), "2" (__subleaf)); return 1; } static int avx512f_os_support (void) { unsigned int eax, edx; unsigned int ecx = 0x0; unsigned int mask = 0x2 | 0x4 | 0x20 | 0x40 | 0x80; __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx)); return ((eax & mask) == mask); } int foo (void); extern void abort (void); static void do_test (void); int main () { unsigned int eax, ebx, ecx, edx; #ifndef WORK if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)) return 0; #endif if (foo () && avx512f_os_support ()) { do_test (); return 0; } return 0; } float a[16] = {-0.1f, -3.2f, -6.3f, -9.4f, -12.5f, -15.6f, -18.7f, -21.8f, 24.9f, 27.1f, 30.2f, 33.3f, 36.4f, 39.5f, 42.6f, 45.7f}; float b[16] = {-1.2f, 3.4f, -5.6f, 7.8f, -9.0f, 1.0f, -2.0f, 3.0f, -4.0f, -5.0f, 6.0f, 7.0f, -8.0f, -9.0f, 10.0f, 11.0f}; float r[16]; static void do_test (void) { int i; for (i = 0; i < 16; i++) r[i] = a[i] * __builtin_copysignf (1.0f, b[i]); for (i = 0; i < 16; i++) if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) abort (); } [hjl@gnu-cfl-1 xxx]$ make /export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -mavx512f -mavx512vl -O2 -ftree-vectorize -fdump-tree-vect-details -S x.c [hjl@gnu-cfl-1 xxx]$ grep vectorized x.c.158t.vect x.c:45:1: note: vectorized 0 loops in function. [hjl@gnu-cfl-1 xxx]$ /export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -mavx512f -mavx512vl -O2 -ftree-vectorize -fdump-tree-vect-details -S x.c -DWORK [hjl@gnu-cfl-1 xxx]$ grep vectorized x.c.158t.vect x.c:83:10: missed: not vectorized: control flow in loop. x.c:79:3: note: === vect_mark_stmts_to_be_vectorized === x.c:79:3: optimized: loop vectorized using 64 byte vectors x.c:45:1: note: vectorized 1 loops in function. [hjl@gnu-cfl-1 xxx]$ Vectorizer doesn't kick in when there are a couple function calls.