https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96031

            Bug ID: 96031
           Summary: suboptimal codegen for store low 16-bits value
           Product: gcc
           Version: 8.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: zhongyunde at tom dot com
  Target Milestone: ---

For the following code, as instruction strh only store the low 16-bits value,
so the 'and     w2, w2, 65535 ' is redundant.
test base on the ARM64 gcc 8.2 on https://gcc.godbolt.org/, so get complicated
assemble.

typedef unsigned int UINT32;
typedef unsigned short UINT16;


UINT16 array[12];

void foo (UINT32 len, UINT32 step)              
{
    UINT32 index = 1;

    for (index = 1 ; index < len; index++ )
        {
            array[index] = index * step;
        }
}

// the assemble of kernel loop body --------------------------
        b       .L4         //
.L6:
        add     x3, x3, 2 // ivtmp.6, ivtmp.6,
.L4:
        strh    w2, [x4, 2]     // ivtmp.4, MEM[base: _2, offset: 2B]
        add     w2, w1, w2        // tmp105, _12, ivtmp.4
        and     w2, w2, 65535     // ivtmp.4, tmp105 ????
        cmp     x3, x0    // ivtmp.6, _23
        mov     x4, x3    // ivtmp.6, ivtmp.6
        bne     .L6             //,

Reply via email to