https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69748
Bug ID: 69748 Summary: SIGSEGV received on vmovdqa instruction during avx-vectorized array initialization Product: gcc Version: 4.9.2 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: vladyslav.buslov at harmonicinc dot com Target Milestone: --- Created attachment 37652 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=37652&action=edit Example program Recently we switched to march=core-avx2 and got unexpected crash during array initialization in class constructor. Problem only appears when __attribute__((__aligned__(64))) is specified for structure that contains instance of that class. According to generated asm code it seems that gcc assumes that array memory is always aligned to 32 and doesn't generate prologue code to process starting bytes up to aligned address. I was able to reproduce this issue with very simple and convoluted example program. Relevant info: ccap@daa-52:~/Sandbox/vb$ g++ -std=gnu++11 -g -v -m64 -pthread -march=core-avx2 -O3 -Wall -Wextra -save-temps testavx.cpp Using built-in specs. COLLECT_GCC=g++ COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.9/lto-wrapper Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Debian 4.9.2-10' --with-bugurl=file:///usr/share/doc/gcc-4.9/README.Bugs --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.9 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.9 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.9-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --with-arch-32=i586 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 4.9.2 (Debian 4.9.2-10) COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread' '-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc' /usr/lib/gcc/x86_64-linux-gnu/4.9/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D_REENTRANT testavx.cpp -m64 -march=core-avx2 -std=gnu++11 -Wall -Wextra -g -fworking-directory -O3 -fpch-preprocess -o testavx.ii ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/4.9" ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu" ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../x86_64-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: /usr/include/c++/4.9 /usr/include/x86_64-linux-gnu/c++/4.9 /usr/include/c++/4.9/backward /usr/lib/gcc/x86_64-linux-gnu/4.9/include /usr/local/include /usr/lib/gcc/x86_64-linux-gnu/4.9/include-fixed /usr/include/x86_64-linux-gnu /usr/include End of search list. COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread' '-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc' /usr/lib/gcc/x86_64-linux-gnu/4.9/cc1plus -fpreprocessed testavx.ii -quiet -dumpbase testavx.cpp -m64 -march=core-avx2 -auxbase testavx -g -O3 -Wall -Wextra -std=gnu++11 -version -o testavx.s GNU C++ (Debian 4.9.2-10) version 4.9.2 (x86_64-linux-gnu) compiled by GNU C version 4.9.2, GMP version 6.0.0, MPFR version 3.1.2-p3, MPC version 1.0.2 GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 GNU C++ (Debian 4.9.2-10) version 4.9.2 (x86_64-linux-gnu) compiled by GNU C version 4.9.2, GMP version 6.0.0, MPFR version 3.1.2-p3, MPC version 1.0.2 GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 Compiler executable checksum: ff85870e740eb08c48a56c2c170390f8 COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread' '-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc' as -v --64 -o testavx.o testavx.s GNU assembler version 2.25 (x86_64-linux-gnu) using BFD version (GNU Binutils for Debian) 2.25 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/ LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../:/lib/:/usr/lib/ COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread' '-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc' /usr/lib/gcc/x86_64-linux-gnu/4.9/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/4.9/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/4.9/lto-wrapper -plugin-opt=-fresolution=testavx.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lpthread -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/4.9/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/4.9 -L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../.. testavx.o -lstdc++ -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/4.9/crtend.o /usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crtn.o ccap@daa-52:~/Sandbox/vb$ ./a.out Segmentation fault (core dumped) Coredump analysis: Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Core was generated by `./a.out'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x0000000000400a6a in PoolWrapper (this=0x2338040) at testavx.cpp:19 19 elems[i].next = i+1; (gdb) disas /m PoolWrapper Dump of assembler code for function CoreData::CoreData(): 17 next_ = 0; 0x00000000004009f1 <+49>: movl $0x0,0x30(%rbx) 0x00000000004009f8 <+56>: lea 0x40(%rbx),%rax 0x00000000004009fc <+60>: vmovdqa 0x17c(%rip),%ymm6 # 0x400b80 18 for (std::size_t i = 0; i < 24000; i++) { 19 elems[i].next = i+1; 0x0000000000400a04 <+68>: vmovdqa 0x154(%rip),%ymm3 # 0x400b60 0x0000000000400a0c <+76>: movl $0x1,0x34(%rbx) 0x0000000000400a13 <+83>: lea 0x17720(%rbx),%rdx 0x0000000000400a1a <+90>: vmovdqa 0x17e(%rip),%ymm5 # 0x400ba0 0x0000000000400a22 <+98>: movl $0x2,0x38(%rbx) 0x0000000000400a29 <+105>: vmovdqa 0x18f(%rip),%ymm4 # 0x400bc0 0x0000000000400a31 <+113>: movl $0x3,0x3c(%rbx) 0x0000000000400a38 <+120>: nopl 0x0(%rax,%rax,1) 0x0000000000400a40 <+128>: vpaddq %ymm5,%ymm3,%ymm1 0x0000000000400a44 <+132>: add $0x20,%rax 0x0000000000400a48 <+136>: vperm2i128 $0x20,%ymm1,%ymm3,%ymm0 0x0000000000400a4e <+142>: vperm2i128 $0x31,%ymm1,%ymm3,%ymm2 0x0000000000400a54 <+148>: vpshufd $0xd8,%ymm0,%ymm1 0x0000000000400a59 <+153>: vpaddq %ymm6,%ymm3,%ymm3 0x0000000000400a5d <+157>: vpshufd $0xd8,%ymm2,%ymm0 0x0000000000400a62 <+162>: vpunpcklqdq %ymm0,%ymm1,%ymm0 0x0000000000400a66 <+166>: vpaddd %ymm0,%ymm4,%ymm0 => 0x0000000000400a6a <+170>: vmovdqa %ymm0,-0x20(%rax) 0x0000000000400a6f <+175>: cmp %rdx,%rax 0x0000000000400a72 <+178>: jne 0x400a40 <CoreData::CoreData()+128> 0x0000000000400a74 <+180>: movl $0x5dbc,0x17720(%rbx) 0x0000000000400a7e <+190>: movl $0x5dbd,0x17724(%rbx) 0x0000000000400a88 <+200>: movl $0x5dbe,0x17728(%rbx) 0x0000000000400a92 <+210>: movl $0x5dbf,0x1772c(%rbx) 0x0000000000400a9c <+220>: movl $0x5dc0,0x17730(%rbx) 0x0000000000400aa6 <+230>: vzeroupper 20 } 21 } 22 23 }; 24 25 struct CoreData { 0x00000000004009c0 <+0>: lea 0x8(%rsp),%r10 0x00000000004009c5 <+5>: and $0xffffffffffffffe0,%rsp 0x00000000004009c9 <+9>: xor %eax,%eax 0x00000000004009cb <+11>: pushq -0x8(%r10) 0x00000000004009cf <+15>: mov $0x6,%ecx 0x00000000004009d4 <+20>: push %rbp 0x00000000004009d5 <+21>: mov $0x17700,%edx 0x00000000004009da <+26>: mov %rsp,%rbp 0x00000000004009dd <+29>: xor %esi,%esi 0x00000000004009df <+31>: push %r10 0x00000000004009e1 <+33>: push %rbx 0x00000000004009e2 <+34>: mov %rdi,%rbx 0x00000000004009e5 <+37>: rep stos %rax,%es:(%rdi) 0x00000000004009e8 <+40>: lea 0x34(%rbx),%rdi ---Type <return> to continue, or q <return> to quit---q Quit (gdb) info registers rax rax 0x2338070 36929648 (gdb)