https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43725

Marat Zakirov <m.zakirov at samsung dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |joseph at codesourcery dot com,
                   |                            |m.zakirov at samsung dot com

--- Comment #7 from Marat Zakirov <m.zakirov at samsung dot com> ---
Another neon alloc issue.

Code:

#include <arm_neon.h>
#include <inttypes.h>

extern  uint16x8x4_t m0;
extern  uint16x8x4_t m1;

void foo(uint16_t * in_ptr)
{
    uint16x8x4_t t0, t1;
    t0 = vld4q_u16((uint16_t *)&in_ptr[0 ]);
    t1 = vld4q_u16((uint16_t *)&in_ptr[64]);
    t0.val[0] *= 333;
    t0.val[1] *= 333;
    t0.val[2] *= 333;
    t0.val[3] *= 333;
    t1.val[0] *= 333;
    t1.val[1] *= 333;
    t1.val[2] *= 333;
    t1.val[3] *= 333;
    m0 = t0;
    m1 = t1;
}

Asm file:

       .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
        add     r1, r0, #160
        vld4.16 {d8, d10, d12, d14}, [r0]
        add     r0, r0, #32
        .pad #64
        sub     sp, sp, #64
        vld4.16 {d16, d18, d20, d22}, [r2]
        movw    r3, #:lower16:m1
        movw    r2, #:lower16:m0
        vldr    d6, .L3
        vldr    d7, .L3+8
        movt    r3, #:upper16:m1
        movt    r2, #:upper16:m0
        vld4.16 {d9, d11, d13, d15}, [r0]
        vld4.16 {d17, d19, d21, d23}, [r1]
        vmul.i16        q12, q3, q4
        vstmia  sp, {d16-d23}      <<< *
        vld1.64 {d4-d5}, [sp:64]   <<< *
        vmul.i16        q13, q3, q5  <<< **
        vmul.i16        q9, q3, q9   
        vmul.i16        q14, q3, q6  <<< **
        vmul.i16        q10, q3, q10
        vmul.i16        q8, q3, q2   <<< **, ***
        vmul.i16        q15, q3, q7  <<< **
        vmul.i16        q11, q3, q11
        vstmia  r2, {d24-d31}
        vstmia  r3, {d16-d23}
        add     sp, sp, #64
        @ sp needed
        fldmfdd sp!, {d8-d15}
        bx      lr

So my qustion are:
1) Why do we need * and why compiler used q2 in *** ?
2) Why compiler didn't reuse registers q5,q6,q2,q7 in ** ?

Command line:

cc1 -quiet -v t.c -quiet -dumpbase t.c -mfpu=neon -mcpu=cortex-a15
-mfloat-abi=softfp -marm -mtls-dialect=gnu -auxbase-strip t.s -O3
-Wno-error=unused-local-typedefs -version -fdump-tree-all -fdump-rtl-all
-funwind-tables -o t.s

gcc version = 4.10.0
--build=x86_64-pc-linux-gnu
--host=x86_64-pc-linux-gnu
--target=arm-v7a15v5r2-linux-gnueabi

--Marat

Reply via email to