http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46012

           Summary: 256bit vectorizer failed on int->double
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: hjl.to...@gmail.com
                CC: rgue...@gcc.gnu.org


For

---
double a[1024];
float b[1024];
int c[1024];

void dependence_distance_4_mixed_0 (void)
{
  int i;
  for (i = 0; i < 1020; ++i)
    a[i + 4] = a[i] + a[i + 4] + c[i];
}
---

with -O3 -ffast-math -mavx, vect256 branch generates:

.L2:
    vmovapd    a(%rax,%rax), %ymm0
    vcvtdq2pd    c(%rax), %ymm1
    vaddpd    a+32(%rax,%rax), %ymm0, %ymm0
    vaddpd    %ymm1, %ymm0, %ymm0
    vmovapd    %ymm0, a+32(%rax,%rax)
    addq    $16, %rax
    cmpq    $4080, %rax
    jne    .L2

Trunk at revision 165455 generates

.L2:
    vmovapd    16(%rax), %xmm2
    vaddpd    -16(%rax), %xmm2, %xmm2
    vmovdqa    (%rdx), %xmm0
    addq    $16, %rdx
    vpshufd    $238, %xmm0, %xmm1
    vcvtdq2pd    %xmm0, %xmm0
    vcvtdq2pd    %xmm1, %xmm1
    vaddpd    %xmm1, %xmm2, %xmm1
    vmovapd    (%rax), %xmm2
    vaddpd    -32(%rax), %xmm2, %xmm2
    vmovapd    %xmm1, 16(%rax)
    vaddpd    %xmm0, %xmm2, %xmm0
    vmovapd    %xmm0, (%rax)
    addq    $32, %rax
    cmpq    %rax, %rcx
    jne    .L2

Reply via email to