I am using MMX built-ins and gcc-4.0-20050825 and I am experiencing generation
of a lot of uneeded movq. I don't know which gcc snapshot introduced this, but a
I know that some pre-release gcc 4.0 didn't show this bad behaviour.

BTW, this is using gcc built-ins. The situation is much wors when using
intrinsics via mmintrin.h. (Again old pre4.0 gcc didn't have the problem;
using gcc builtins or mmintin.h intrinsics made no difference; both generated
nice code.)

LC_ALL=C i686-pc-linux-gnu-gcc-4.0.2-beta20050825 -v
Using built-in specs.
Target: i686-pc-linux-gnu
Configured with:
/var/tmp/portage/gcc-4.0.2_beta20050825/work/gcc-4.0-20050825/configure
--prefix=/usr --bindir=/usr/i686-pc-linux-gnu/gcc-bin/4.0.2-beta20050825
--includedir=/usr/lib/gcc/i686-pc-linux-gnu/4.0.2-beta20050825/include
--datadir=/usr/share/gcc-data/i686-pc-linux-gnu/4.0.2-beta20050825
--mandir=/usr/share/gcc-data/i686-pc-linux-gnu/4.0.2-beta20050825/man
--infodir=/usr/share/gcc-data/i686-pc-linux-gnu/4.0.2-beta20050825/info
--with-gxx-include-dir=/usr/lib/gcc/i686-pc-linux-gnu/4.0.2-beta20050825/include/g++-v4
--host=i686-pc-linux-gnu --build=i686-pc-linux-gnu --disable-altivec
--enable-nls --without-included-gettext --with-system-zlib --disable-checking
--disable-werror --disable-libunwind-exceptions --disable-multilib
--disable-libgcj --enable-languages=c,c++ --enable-shared --enable-threads=posix
--enable-__cxa_atexit --enable-clocale=gnu
Thread model: posix
gcc version 4.0.2-beta20050825 (Gentoo 4.0.2_beta20050825)


i686-pc-linux-gnu-gcc-4.0.2-beta20050825 mixaudio16.c -save-temps -c -O2
-march=athlon-xp

Source:

typedef int v2si __attribute__ ((vector_size (8)));
typedef int di __attribute__ ((vector_size (8)));
typedef short v4hi __attribute__ ((vector_size (8)));

void MixAudio16_MMX_T(char* src1, char* src2, char* dst)
{
        
        v4hi indata;
        v4hi signmask;
                
        v2si loout;
        v2si hiout;
        
        v2si temp;

        __attribute__((aligned(16))) static const short sm[4] =
{0x8000,0x8000,0x8000,0x8000};
        static const v4hi *m = (v4hi*)sm;

        indata   = *(v4hi*)src1;
        signmask = (v4hi)__builtin_ia32_pand((di)indata, *(di*)m);
        signmask = __builtin_ia32_pcmpeqw(signmask, *m);
        loout = (v2si)__builtin_ia32_punpcklwd(indata, signmask);
        hiout = (v2si)__builtin_ia32_punpckhwd(indata, signmask);
        
        indata   = *(v4hi*)src2;
        signmask = (v4hi)__builtin_ia32_pand((di)indata, *(di*)m);
        signmask = __builtin_ia32_pcmpeqw(signmask, *m);

        temp  = (v2si)__builtin_ia32_punpcklwd(indata, signmask);
        loout = __builtin_ia32_paddd(loout, temp);
        temp  = (v2si)__builtin_ia32_punpckhwd(indata, signmask);
        hiout = __builtin_ia32_paddd(hiout, temp);
                
        *(v4hi*)dst = __builtin_ia32_packssdw(loout, hiout);
        __builtin_ia32_emms();
        
        return;
}

assembler:

00002e50 <MixAudio16_MMX_T>:
    2e50:       55                      push   %ebp
    2e51:       89 e5                   mov    %esp,%ebp
    2e53:       83 ec 10                sub    $0x10,%esp
    2e56:       8b 15 04 00 00 00       mov    0x4,%edx
    2e5c:       8b 45 08                mov    0x8(%ebp),%eax
    2e5f:       0f 6f 10                movq   (%eax),%mm2
    2e62:       0f 6f ca                movq   %mm2,%mm1
    2e65:       8b 45 0c                mov    0xc(%ebp),%eax
    2e68:       0f 7f 55 f8             movq   %mm2,0xfffffff8(%ebp)
    2e6c:       0f 6f 45 f8             movq   0xfffffff8(%ebp),%mm0
    2e70:       0f db 02                pand   (%edx),%mm0
    2e73:       0f 7f 45 f0             movq   %mm0,0xfffffff0(%ebp)
    2e77:       0f 6f 45 f0             movq   0xfffffff0(%ebp),%mm0
    2e7b:       0f 75 02                pcmpeqw (%edx),%mm0
    2e7e:       0f 61 c8                punpcklwd %mm0,%mm1
    2e81:       0f 69 d0                punpckhwd %mm0,%mm2
    2e84:       0f 7f 4d f8             movq   %mm1,0xfffffff8(%ebp)
    2e88:       0f 6f 5d f8             movq   0xfffffff8(%ebp),%mm3
    2e8c:       0f 7f 55 f8             movq   %mm2,0xfffffff8(%ebp)
    2e90:       0f 6f 10                movq   (%eax),%mm2
    2e93:       0f 6f 65 f8             movq   0xfffffff8(%ebp),%mm4
    2e97:       0f 7f 55 f8             movq   %mm2,0xfffffff8(%ebp)
    2e9b:       0f 6f 45 f8             movq   0xfffffff8(%ebp),%mm0
    2e9f:       0f 6f ca                movq   %mm2,%mm1
    2ea2:       0f db 02                pand   (%edx),%mm0
    2ea5:       8b 45 10                mov    0x10(%ebp),%eax
    2ea8:       0f 7f 45 f0             movq   %mm0,0xfffffff0(%ebp)
    2eac:       0f 6f 45 f0             movq   0xfffffff0(%ebp),%mm0
    2eb0:       0f 75 02                pcmpeqw (%edx),%mm0
    2eb3:       0f 61 c8                punpcklwd %mm0,%mm1
    2eb6:       0f 69 d0                punpckhwd %mm0,%mm2
    2eb9:       0f 7f 4d f8             movq   %mm1,0xfffffff8(%ebp)
    2ebd:       0f fe 5d f8             paddd  0xfffffff8(%ebp),%mm3
    2ec1:       0f 7f 55 f8             movq   %mm2,0xfffffff8(%ebp)
    2ec5:       0f fe 65 f8             paddd  0xfffffff8(%ebp),%mm4
    2ec9:       0f 6b dc                packssdw %mm4,%mm3
    2ecc:       0f 7f 18                movq   %mm3,(%eax)
    2ecf:       0f 77                   emms
    2ed1:       c9                      leave
    2ed2:       c3                      ret
    2ed3:       8d b6 00 00 00 00       lea    0x0(%esi),%esi
    2ed9:       8d bc 27 00 00 00 00    lea    0x0(%edi),%edi

-- 
           Summary: built-ins MMX regression
           Product: gcc
           Version: 4.0.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: prakash at punnoor dot de
                CC: gcc-bugs at gcc dot gnu dot org
GCC target triplet: i?86-*-*


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23630

Reply via email to