https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95019

            Bug ID: 95019
           Summary: Optimizer produces suboptimal code related to
                    -ftree-ivopts
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: zhongyunde at tom dot com
  Target Milestone: ---

For the following code, we can known the variable C000005A1 is only used for
the offset of array Dest and Src, and the unit size of the array is 8 bytes, so
an iv variable with step 8 will be good for targets, whose load/store insns
don't folded the lshift operand.

typedef unsigned long long UINT64;

void C00000ADA(UINT64 len, long long *__restrict Src, long long *__restrict
Dest)
{
    UINT64 C00000ADD, index, C00000068, offset, C00000ADF;
    UINT64 C000005A1 = 0;

    for (index = 0; index < len; index++) {

        Dest[C000005A1] =  Src[C000005A1] * Src[C000005A1];
        C000005A1 += len - index;
    }
}

test base on the MIPS64 gcc 5.4 on https://gcc.godbolt.org, as the MIPS64
target doesn't have load/store folded the lshift operand such as 'ldr     x3,
[x1, x4, lsl 3]' in ARM64 targets , so use ivtmp with step 8 can eliminate the
dsll insn, which is in the kernel loop.

@@ -2,16 +2,17 @@ C00000ADA(unsigned long long, long long*, long long*):
         beq     $4,$0,.L10         #, len,,
         move    $7,$0    # C000005A1,

+        dsll    $8,$4,3  # tmp, len << 3  
+
 .L4:
-        dsll    $2,$7,3  # D.2019, C000005A1,
-        daddu   $3,$5,$2       # tmp204, Src, D.2019
+        daddu   $3,$5,$7       # tmp204, Src, D.2019
         ld      $3,0($3)     # D.2021, *_10
-        daddu   $2,$6,$2       # tmp205, Dest, D.2019
+        daddu   $2,$6,$7       # tmp205, Dest, D.2019
         dmult   $3,$3  # D.2021, D.2021
         daddu   $7,$7,$4       # C000005A1, C000005A1, ivtmp.6
-        daddiu  $4,$4,-1     # ivtmp.6, ivtmp.6,
+        daddiu  $4,$4,-8     # ivtmp.6, ivtmp.6,
         mflo    $3       # D.2021
-        bne     $4,$0,.L4  #, ivtmp.6,,
+        bne     $8,$0,.L4  #, ivtmp.6,,
         sd      $3,0($2)     # D.2021, *_8

 .L10:

Reply via email to