When compiling with following CVS HEAD snapshot from 30.12.2004

---------------------------
# gcc -v
Using built-in specs.
Configured with: ../../../gcc-CVS-20041230/gcc-CVS-20041230/configure
--host=i686-pc-linux-gnu --prefix=/usr/local/opt/gcc-4.0
--exec-prefix=/usr/local/opt/gcc-4.0 --sysconfdir=/etc
--libdir=/usr/local/opt/gcc-4.0/lib --libexecdir=/usr/local/opt/gcc-4.0/libexec
--sharedstatedir=/var --localstatedir=/var --program-suffix=-4.0
--with-x-includes=/usr/X11R6/include --with-x-libraries=/usr/X11R6/lib
--enable-shared --enable-static --with-gnu-as --with-gnu-ld --enable-libada
--with-stabs --enable-threads=posix --enable-version-specific-runtime-libs
--disable-coverage --enable-gather-detailed-mem-stats --disable-libgcj
--disable-checking --enable-multilib --with-x --enable-cmath
--enable-libstdcxx-debug --enable-fast-character --enable-hash-synchronization
--enable-languages=c,c++,f95,objc,ada --with-system-zlib --with-libbanshee
--with-demangler-in-ld --with-arch=athlon-xp
Thread model: posix
gcc version 4.0.0 20041230 (experimental)
---------------------------

the following test code

-- test.c -----------------
typedef struct t1 { double a,b,c,d; } t1_t;
typedef struct t2 { double e; t1_t g[4]; } t2_t;
t2_t *H;

void f (t2_t *h)
{
        int i;

        for (i=0; i<4; i++)
        {
                h->g[i].a=h->e;
                h->g[i].b=h->e;
                h->g[i].c=h->e;
                h->g[i].d=h->e;
        }
}

int main (void)
{
        f(&H);
        return 0;
}
--------------------------------

compiled with

--------------------------------
gcc -O3 -march=athlon-xp -msse -mfpmath=sse -c test.c -o test.o
--------------------------------

produces following code (listed by objdump, because it also shows the hexcodes).

--------------------------------
test.o:     file format elf32-i386

Disassembly of section .text:

00000000 <f>:
   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   8b 55 08                mov    0x8(%ebp),%edx
   6:   8d 8a 80 00 00 00       lea    0x80(%edx),%ecx
   c:   89 d0                   mov    %edx,%eax
   e:   89 f6                   mov    %esi,%esi
  10:   f3 0f 7e 02             movq   (%edx),%xmm0
  14:   66 0f d6 40 08          movq   %xmm0,0x8(%eax)
  19:   f3 0f 7e 02             movq   (%edx),%xmm0
  1d:   66 0f d6 40 10          movq   %xmm0,0x10(%eax)
  22:   f3 0f 7e 02             movq   (%edx),%xmm0
  26:   66 0f d6 40 18          movq   %xmm0,0x18(%eax)
  2b:   f3 0f 7e 02             movq   (%edx),%xmm0
  2f:   66 0f d6 40 20          movq   %xmm0,0x20(%eax)
  34:   83 c0 20                add    $0x20,%eax
  37:   39 c8                   cmp    %ecx,%eax
  39:   75 d5                   jne    10 <f+0x10>
  3b:   c9                      leave
  3c:   c3                      ret
  3d:   8d 76 00                lea    0x0(%esi),%esi

00000040 <main>:
  40:   55                      push   %ebp
  41:   b8 00 00 00 00          mov    $0x0,%eax
  46:   89 e5                   mov    %esp,%ebp
  48:   83 ec 08                sub    $0x8,%esp
  4b:   83 e4 f0                and    $0xfffffff0,%esp
  4e:   83 ec 10                sub    $0x10,%esp
  51:   f3 0f 7e 05 00 00 00    movq   0x0,%xmm0
  58:   00
  59:   66 0f d6 40 08          movq   %xmm0,0x8(%eax)
  5e:   f3 0f 7e 05 00 00 00    movq   0x0,%xmm0
  65:   00
  66:   66 0f d6 40 10          movq   %xmm0,0x10(%eax)
  6b:   f3 0f 7e 05 00 00 00    movq   0x0,%xmm0
  72:   00
  73:   66 0f d6 40 18          movq   %xmm0,0x18(%eax)
  78:   f3 0f 7e 05 00 00 00    movq   0x0,%xmm0
  7f:   00
  80:   66 0f d6 40 20          movq   %xmm0,0x20(%eax)
  85:   83 c0 20                add    $0x20,%eax
  88:   3d 80 00 00 00          cmp    $0x80,%eax
  8d:   75 c2                   jne    51 <main+0x11>
  8f:   c9                      leave
  90:   31 c0                   xor    %eax,%eax
  92:   c3                      ret
------------------------------------

All instructions on lines 10-2f and 51-80 belong to the SSE2 instruction set
(at least according to the AMD documentation), and, though, are not supported
by AthlonXP, which fails to run this with an illegal instruction error message.
I also tried to add -mno-sse2, but with no luck.
GCC 3.4.1 doesn't seem to have that problem.

-- 
           Summary: GCC generates SSE2 instructions for AthlonXP which
                    doesn't support them.
           Product: gcc
           Version: 4.0.0
            Status: UNCONFIRMED
          Severity: critical
          Priority: P1
         Component: rtl-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: drab at kepler dot fjfi dot cvut dot cz
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19235

Reply via email to