https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69748

            Bug ID: 69748
           Summary: SIGSEGV received on vmovdqa instruction during
                    avx-vectorized array initialization
           Product: gcc
           Version: 4.9.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vladyslav.buslov at harmonicinc dot com
  Target Milestone: ---

Created attachment 37652
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=37652&action=edit
Example program

Recently we switched to march=core-avx2 and got unexpected crash during array
initialization in class constructor.
Problem only appears when __attribute__((__aligned__(64))) is specified for
structure that contains instance of that class.
According to generated asm code it seems that gcc assumes that array memory is
always aligned to 32 and doesn't generate prologue code to process starting
bytes up to aligned address.

I was able to reproduce this issue with very simple and convoluted example
program.

Relevant info:
ccap@daa-52:~/Sandbox/vb$ g++ -std=gnu++11 -g -v -m64 -pthread -march=core-avx2
-O3 -Wall -Wextra -save-temps testavx.cpp
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.9/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 4.9.2-10'
--with-bugurl=file:///usr/share/doc/gcc-4.9/README.Bugs
--enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
--program-suffix=-4.9 --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--with-gxx-include-dir=/usr/include/c++/4.9 --libdir=/usr/lib --enable-nls
--with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug
--enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-vtable-verify
--enable-plugin --with-system-zlib --disable-browser-plugin
--enable-java-awt=gtk --enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64/jre --enable-java-home
--with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.9-amd64
--with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--enable-objc-gc --enable-multiarch --with-arch-32=i586 --with-abi=m64
--with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic
--enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu
--target=x86_64-linux-gnu
Thread model: posix
gcc version 4.9.2 (Debian 4.9.2-10) 
COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread'
'-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc'
 /usr/lib/gcc/x86_64-linux-gnu/4.9/cc1plus -E -quiet -v -imultiarch
x86_64-linux-gnu -D_GNU_SOURCE -D_REENTRANT testavx.cpp -m64 -march=core-avx2
-std=gnu++11 -Wall -Wextra -g -fworking-directory -O3 -fpch-preprocess -o
testavx.ii
ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/4.9"
ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
ignoring nonexistent directory
"/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../x86_64-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/include/c++/4.9
 /usr/include/x86_64-linux-gnu/c++/4.9
 /usr/include/c++/4.9/backward
 /usr/lib/gcc/x86_64-linux-gnu/4.9/include
 /usr/local/include
 /usr/lib/gcc/x86_64-linux-gnu/4.9/include-fixed
 /usr/include/x86_64-linux-gnu
 /usr/include
End of search list.
COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread'
'-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc'
 /usr/lib/gcc/x86_64-linux-gnu/4.9/cc1plus -fpreprocessed testavx.ii -quiet
-dumpbase testavx.cpp -m64 -march=core-avx2 -auxbase testavx -g -O3 -Wall
-Wextra -std=gnu++11 -version -o testavx.s
GNU C++ (Debian 4.9.2-10) version 4.9.2 (x86_64-linux-gnu)
        compiled by GNU C version 4.9.2, GMP version 6.0.0, MPFR version
3.1.2-p3, MPC version 1.0.2
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
GNU C++ (Debian 4.9.2-10) version 4.9.2 (x86_64-linux-gnu)
        compiled by GNU C version 4.9.2, GMP version 6.0.0, MPFR version
3.1.2-p3, MPC version 1.0.2
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: ff85870e740eb08c48a56c2c170390f8
COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread'
'-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc'
 as -v --64 -o testavx.o testavx.s
GNU assembler version 2.25 (x86_64-linux-gnu) using BFD version (GNU Binutils
for Debian) 2.25
COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/4.9/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-std=gnu++11' '-g' '-v' '-m64' '-pthread'
'-march=core-avx2' '-O3' '-Wall' '-Wextra' '-save-temps' '-shared-libgcc'
 /usr/lib/gcc/x86_64-linux-gnu/4.9/collect2 -plugin
/usr/lib/gcc/x86_64-linux-gnu/4.9/liblto_plugin.so
-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/4.9/lto-wrapper
-plugin-opt=-fresolution=testavx.res -plugin-opt=-pass-through=-lgcc_s
-plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lpthread
-plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s
-plugin-opt=-pass-through=-lgcc --sysroot=/ --build-id --eh-frame-hdr -m
elf_x86_64 --hash-style=gnu -dynamic-linker /lib64/ld-linux-x86-64.so.2
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crt1.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crti.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/crtbegin.o
-L/usr/lib/gcc/x86_64-linux-gnu/4.9
-L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu
-L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../../lib -L/lib/x86_64-linux-gnu
-L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib
-L/usr/lib/gcc/x86_64-linux-gnu/4.9/../../.. testavx.o -lstdc++ -lm -lgcc_s
-lgcc -lpthread -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/4.9/crtend.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crtn.o
ccap@daa-52:~/Sandbox/vb$ ./a.out
Segmentation fault (core dumped)

Coredump analysis:
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `./a.out'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000000000400a6a in PoolWrapper (this=0x2338040) at testavx.cpp:19
19                  elems[i].next = i+1;
(gdb) disas /m PoolWrapper
Dump of assembler code for function CoreData::CoreData():
17              next_ = 0;
   0x00000000004009f1 <+49>:    movl   $0x0,0x30(%rbx)
   0x00000000004009f8 <+56>:    lea    0x40(%rbx),%rax
   0x00000000004009fc <+60>:    vmovdqa 0x17c(%rip),%ymm6        # 0x400b80

18              for (std::size_t i = 0; i < 24000; i++) {
19                  elems[i].next = i+1;
   0x0000000000400a04 <+68>:    vmovdqa 0x154(%rip),%ymm3        # 0x400b60
   0x0000000000400a0c <+76>:    movl   $0x1,0x34(%rbx)
   0x0000000000400a13 <+83>:    lea    0x17720(%rbx),%rdx
   0x0000000000400a1a <+90>:    vmovdqa 0x17e(%rip),%ymm5        # 0x400ba0
   0x0000000000400a22 <+98>:    movl   $0x2,0x38(%rbx)
   0x0000000000400a29 <+105>:   vmovdqa 0x18f(%rip),%ymm4        # 0x400bc0
   0x0000000000400a31 <+113>:   movl   $0x3,0x3c(%rbx)
   0x0000000000400a38 <+120>:   nopl   0x0(%rax,%rax,1)
   0x0000000000400a40 <+128>:   vpaddq %ymm5,%ymm3,%ymm1
   0x0000000000400a44 <+132>:   add    $0x20,%rax
   0x0000000000400a48 <+136>:   vperm2i128 $0x20,%ymm1,%ymm3,%ymm0
   0x0000000000400a4e <+142>:   vperm2i128 $0x31,%ymm1,%ymm3,%ymm2
   0x0000000000400a54 <+148>:   vpshufd $0xd8,%ymm0,%ymm1
   0x0000000000400a59 <+153>:   vpaddq %ymm6,%ymm3,%ymm3
   0x0000000000400a5d <+157>:   vpshufd $0xd8,%ymm2,%ymm0
   0x0000000000400a62 <+162>:   vpunpcklqdq %ymm0,%ymm1,%ymm0
   0x0000000000400a66 <+166>:   vpaddd %ymm0,%ymm4,%ymm0
=> 0x0000000000400a6a <+170>:   vmovdqa %ymm0,-0x20(%rax)
   0x0000000000400a6f <+175>:   cmp    %rdx,%rax
   0x0000000000400a72 <+178>:   jne    0x400a40 <CoreData::CoreData()+128>
   0x0000000000400a74 <+180>:   movl   $0x5dbc,0x17720(%rbx)
   0x0000000000400a7e <+190>:   movl   $0x5dbd,0x17724(%rbx)
   0x0000000000400a88 <+200>:   movl   $0x5dbe,0x17728(%rbx)
   0x0000000000400a92 <+210>:   movl   $0x5dbf,0x1772c(%rbx)
   0x0000000000400a9c <+220>:   movl   $0x5dc0,0x17730(%rbx)
   0x0000000000400aa6 <+230>:   vzeroupper 

20              }
21          }
22      
23      };
24      
25      struct CoreData {
   0x00000000004009c0 <+0>:     lea    0x8(%rsp),%r10
   0x00000000004009c5 <+5>:     and    $0xffffffffffffffe0,%rsp
   0x00000000004009c9 <+9>:     xor    %eax,%eax
   0x00000000004009cb <+11>:    pushq  -0x8(%r10)
   0x00000000004009cf <+15>:    mov    $0x6,%ecx
   0x00000000004009d4 <+20>:    push   %rbp
   0x00000000004009d5 <+21>:    mov    $0x17700,%edx
   0x00000000004009da <+26>:    mov    %rsp,%rbp
   0x00000000004009dd <+29>:    xor    %esi,%esi
   0x00000000004009df <+31>:    push   %r10
   0x00000000004009e1 <+33>:    push   %rbx
   0x00000000004009e2 <+34>:    mov    %rdi,%rbx
   0x00000000004009e5 <+37>:    rep stos %rax,%es:(%rdi)
   0x00000000004009e8 <+40>:    lea    0x34(%rbx),%rdi
---Type <return> to continue, or q <return> to quit---q
Quit
(gdb) info registers rax
rax            0x2338070        36929648
(gdb)

Reply via email to